diff --git a/.github/workflows/test-tuto.yml b/.github/workflows/test-tuto.yml new file mode 100644 index 0000000..4cee1bc --- /dev/null +++ b/.github/workflows/test-tuto.yml @@ -0,0 +1,123 @@ +name: Build Archives (CI Only) + +on: + push: + branches: + - '**' + +jobs: + build_archives: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Prepare template files + run: | + python ./scripts/prepare_template.py + + - name: Prepare archives + run: | + python ./scripts/prepare_archives.py + + - name: Upload archives as artifacts + uses: actions/upload-artifact@v4 + with: + name: tutorial-archives + path: | + docs/data/gegelati-tutorial.zip + docs/data/gegelati-tutorial-solution.zip + docs/data/gegelati-tutorial-strengthening-solution.zip + docs/data/gegelati-tutorial-parallel-solution.zip + + test_archives: + needs: build_archives + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + archive: + - gegelati-tutorial.zip + - gegelati-tutorial-solution.zip + - gegelati-tutorial-strengthening-solution.zip + - gegelati-tutorial-parallel-solution.zip + os: [ubuntu-latest, windows-latest] ##, windows-latest, macos-latest] + compiler: [gcc, msvc] ##, clang, msvc] + exclude: + # Exclude MSVC on non-Windows + - os: ubuntu-latest + compiler: msvc + ## - os: macos-latest + ## compiler: msvc + # Exclude GCC and Clang on Windows (unless you want to setup MinGW/LLVM) + - os: windows-latest + compiler: gcc + ## - os: windows-latest + ## compiler: clang + + name: Test ${{ matrix.archive }} on ${{ matrix.os }} with ${{ matrix.compiler }} + steps: + - uses: actions/checkout@v3 + + - name: Download archives artifact + uses: actions/download-artifact@v4 + with: + name: tutorial-archives + path: archives + + - name: Unzip archive + run: | + unzip -q archives/${{ matrix.archive }} -d tutorial + shell: bash + + - name: Set up compiler + if: matrix.compiler == 'gcc' + uses: egor-tensin/setup-gcc@v1 + with: + version: latest + # GCC is default on Linux, but this ensures it's available + + - name: Set up Clang + if: matrix.compiler == 'clang' + uses: egor-tensin/setup-clang@v1 + with: + version: latest + + - name: Set up MSVC + if: matrix.compiler == 'msvc' && runner.os == 'Windows' + uses: ilammy/msvc-dev-cmd@v1 + + - name: Install Libraries (Linux) + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt install -y libsdl2-dev libsdl2-image-dev libsdl2-ttf-dev + + - name: Build Gegelati (Linux/MacOS) + if: matrix.os == 'ubuntu-latest' || matrix.os == 'macos-latest' + run: | + git clone -b master https://github.com/gegelati/gegelati.git lib/gegelati + cd lib/gegelati/bin + cmake .. -DBUILD_TESTING=OFF -DSKIP_DOXYGEN_BUILD=ON -DCMAKE_BUILD_TYPE=Release + sudo cmake --build . --target install --parallel $(nproc) + shell: bash + + - name: Configure CMake project + run: | + mkdir -p tutorial/build + cmake -S tutorial/gegelati-tutorial -B build -DTESTING=ON + env: + CC: ${{ matrix.compiler == 'gcc' && 'gcc' || matrix.compiler == 'clang' && 'clang' || '' }} + CXX: ${{ matrix.compiler == 'gcc' && 'g++' || matrix.compiler == 'clang' && 'clang++' || '' }} + shell: bash + + - name: Build manual-control target + run: | + cmake --build build --config Release --target manual-control --parallel $(nproc) + shell: bash + + - name: Build and run tpg-training target + if: matrix.archive != 'gegelati-tutorial.zip' + run: | + sed -i 's/"nbGenerations": [0-9]*/"nbGenerations": 4/' tutorial/gegelati-tutorial/params.json + cmake --build build --config Release --target tpg-training --parallel $(nproc) + cd build && ./Release/tpg-training # cd needed for windows dll + shell: bash \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d8b53a..53c9ec7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ endif() # Add definitions for testing purposes if(${TESTING}) MESSAGE("Testing mode") - add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2) + add_definitions(-DNO_CONSOLE_CONTROL -DNB_GENERATIONS=2 -DDEACTIVATE_DISPLAY=1) endif() # ******************************************* @@ -97,7 +97,7 @@ add_executable(tpg-training ${pendulum_files} ${training_files}) target_link_libraries(tpg-training ${GEGELATI_LIBRARIES} ${SDL2_LIBRARY} ${SDL2_IMAGE_LIBRARY} ${SDL2TTF_LIBRARY}) target_compile_definitions(tpg-training PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}") -#ifdef SOLUTION +#ifdef SOLUTION_INFERENCE # Sub project for inference file(GLOB inference_files @@ -111,4 +111,4 @@ include_directories(${GEGELATI_INCLUDE_DIRS} ${SDL2_INCLUDE_DIR} ${SDL2_IMAGE_IN add_executable(tpg-inference ${pendulum_files} ${inference_files}) target_link_libraries(tpg-inference ${GEGELATI_LIBRARIES} ${SDL2_LIBRARY} ${SDL2_IMAGE_LIBRARY} ${SDL2TTF_LIBRARY}) target_compile_definitions(tpg-inference PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}") -#endif // SOLUTION \ No newline at end of file +#endif // SOLUTION_INFERENCE \ No newline at end of file diff --git a/docs/_pages/parallel_training.md b/docs/_pages/parallel_training.md new file mode 100644 index 0000000..c7f3a1f --- /dev/null +++ b/docs/_pages/parallel_training.md @@ -0,0 +1,134 @@ +--- +title: Parallel Training of Tangled Program Graphs +permalink: /tutos/parallel-training +toc: true +toc_sticky: true +--- + +The objective of this tutorial is to activate parallel training of Tangled Program Graphs (TPGs) with Gegelati by: +- instantiating a `ParallelLearningAgent`, and +- making the `PendulumWrapper` safely copyable so worker threads receive independent environments. + +The starting point of this tutorial is the C++ project obtained at the end of the _[GEGELATI introductory tutorial](/gegelati-tutorial)_. While completing the introductory tutorial is strongly advised, a copy of the project resulting from this tutorial can be downloaded at the following link: [pendulum_wrapper_solution.zip](/gegelati-tutorial/data/gegelati-tutorial-solution.zip). + +## Why make the environment copyable? + +The learning process of TPGs involves two main time-consuming steps per generation: +- Evaluation of the fitness of each individual TPG root within the `PendulumWrapper` learning environment. This step takes time `T_eval` seconds at each generation in the printed log. +- Mutation of the TPG population. This step takes time `T_mutat` seconds at each generation in the printed log. + +When using a `LearningAgent`, both steps are performed sequentially on a single thread. To accelerate training, it is possible to parallelize these steps across multiple threads/cores by using `ParallelLearningAgent`. + +To better take not of the benefits of parallel training, keep a copy of the logs produced by the sequential training for comparison. + +An important feature of Gegelati is that the parallelization of training is fully deterministic, which means that running the same training with the same random seed will always produce the same results, regardless of the number of threads used. This is achieved by ensuring that each worker thread operates on its own independent copy of the learning environment. + +## 0. Parallelize mutations + +To enable parallel mutations, the sequential `LearningAgent` must be replaced with `ParallelLearningAgent`. By default, the number of threads is set to the number of available hardware threads on the machine. + +#### TODO 1: +Edit the `/gegelati-tutorial/src/training/main-training.cpp` by replacing the line that instantiates the `LearningAgent` with a line that instantiates a `ParallelLearningAgent`: + +{% details Solution to #1 (Click to expand) %} +```cpp +/* main-training.cpp */ +// Instantiate and initialize the Learning Agent (LA) +Learn::ParallelLearningAgent la(pendulumLE, instructionSet, params); +``` + +{% enddetails %} + +Build and run the `main-training` target of the project. You should observe that `T_mutat` times have slightly decreased compared to the sequential training log. Other columns relative to the trained TPG characteristics (`NbVert`, `NbActR`, `NbTeamR`) and the fitness of agents (`Min`, `Avg`, `Max`) should remain identical to the sequential training. + +## 1. Parallelize evaluations + +To enable parallel evaluations, the `PendulumWrapper` must be made safely copyable. This is done first by implementing the copy constructor of the `PendulumWrapper` class, and then by overriding the `clone()` method inherited from the `LearningEnvironment` base class. + +#### TODO 2: +Edit the `/gegelati-tutorial/src/environments/pendulum_wrapper.h` and `/gegelati-tutorial/src/environments/pendulum_wrapper.cpp` to add a copy constructor `PendulumWrapper(const PendulumWrapper& other)` to the class. + +It is important to note that the default copy constructor generated by the compiler would perform a shallow copy of the member variables, which is not suitable in this case. Therefore, a custom copy constructor must be implemented to ensure that all member variables are properly duplicated. + +Special care should be taken to handle the `std::vector> data` attribute, this attribute must be initialized as a copy-constructed copy of the `other.data` attribute. Then the pointers contained in the vector must be updated to point to the attributes of the `this->pendulum`, and not to `other.pendulum` as is the case after copy-constructing the `data` attribute. + + +{% details Solution to #2 (Click to expand) %} +```cpp +/* pendulum_wrapper.h */ +// Copy constructor +PendulumWrapper(const PendulumWrapper& other); +``` + +```cpp +/* pendulum_wrapper.cpp */ +// Copy constructor implementation +PendulumWrapper::PendulumWrapper(const PendulumWrapper& other) + : LearningEnvironment(other), // Call base class copy constructor + pendulum(other.pendulum), // Copy-construct the pendulum + data(other.data) // Copy-construct the data vector +{ + // Update pointers in data to point to this->pendulum's attributes + data.at(0).setPointer(&this->pendulum.getAngle()); + data.at(1).setPointer(&this->pendulum.getVelocity()); +} +``` + +{% enddetails %} + +#### TODO 3: +Next, override the `clone()` method in the `PendulumWrapper` class to return a new instance of `PendulumWrapper` created using the copy constructor. + +{% details Solution to #3 (Click to expand) %} +```cpp +/* pendulum_wrapper.h */ +// Override clone method +Data::LearningEnvironment* clone() const override; +``` + +```cpp +/* pendulum_wrapper.cpp */ +// Override clone method implementation +Data::LearningEnvironment* PendulumWrapper::clone() const { + return new PendulumWrapper(*this); // Use copy constructor +} +``` + +{% enddetails %} + + +#### TODO 4: +To signal to Gegelati that the `PendulumWrapper` can be safely copied for parallel evaluation, the `LearningEnvironment::isCopyable()` method must be overridden to return `true`. + +{% details Solution to #4 (Click to expand) %} +```cpp +/* pendulum_wrapper.h */ +// Override isCopyable method +bool isCopyable() const override; +``` + +```cpp +/* pendulum_wrapper.cpp */ +// Override isCopyable method implementation +bool PendulumWrapper::isCopyable() const { + return true; // Indicate that this environment is copyable +} +``` + +{% enddetails %} + +#### Test parallel evaluations +Build and run the `main-training` target of the project. You should observe that `T_eval` times have significantly decreased compared to the sequential training log. Other columns relative to the trained TPG characteristics (`NbVert`, `NbActR`, `NbTeamR`) and the fitness of agents (`Min`, `Avg`, `Max`) should remain identical to the sequential training. + +It it possible to control the number of threads used by the `ParallelLearningAgent` by setting the `nbThreads` parameter in the `/gegelati-tutorial/params.json` file as follows: + +```json +"nbThreads": 4, +``` + +## Conclusion +In this tutorial, you have successfully enabled parallel training of Tangled Program Graphs (TPGs) in Gegelati by replacing the sequential `LearningAgent` with `ParallelLearningAgent` and making the `PendulumWrapper` safely copyable. + +More information about parallel training with Gegelati can be found in the following publication: + +[_K. Desnos, N. Sourbier, P.-Y. Raumer, O. Gesny and M. Pelcat. GEGELATI: Lightweight Artificial Intelligence through Generic and Evolvable Tangled Program Graphs. In Workshop on Design and Architectures for Signal and Image Processing (DASIP), ACM, 2021_](https://arxiv.org/pdf/2012.08296) \ No newline at end of file diff --git a/docs/_pages/strengthening_agents.md b/docs/_pages/strengthening_agents.md new file mode 100644 index 0000000..9af18da --- /dev/null +++ b/docs/_pages/strengthening_agents.md @@ -0,0 +1,91 @@ +--- +title: Strengthening Reinforcement Learning Agents in with Multi-Episode Evaluation and Validation Phases +permalink: /tutos/strengthening-agents +toc: true +toc_sticky: true +--- + +The objective of this tutorial is two-fold: +1. Strengthen the built reinforcement learning agents by evaluating them over multiple episodes during training, and +2. Activate a validation phase at the end of each generation to monitor potential overfitting, and + +The starting point of this tutorial is the C++ project obtained at the end of the _[GEGELATI introductory tutorial](/gegelati-tutorial)_. While completing the introductory tutorial is strongly advised, a copy of the project resulting from this tutorial can be downloaded at the following link: [pendulum_wrapper_solution.zip](/gegelati-tutorial/data/gegelati-tutorial-solution.zip). + +## Multi-episode evaluation setup +### Why evaluate over multiple episodes? +An episode refers to a complete sequence of interactions between a reinforcement learning agent and its environment, starting from an initial state and ending when a terminal condition is met. For example, in the initial tutorial, an episode consists of the agent attempting to balance the pendulum for a fixed duration of 1500 time steps, as defined by the `maxNbActionsPerEval` parameter in `params.json`. + +In reinforcement learning, evaluating an agent's performance over multiple episodes is crucial for obtaining a reliable evaluation of its true capabilities. This is because the performance of an agent can vary significantly from one episode to another due to the inherent stochasticity of the environment and the agent's policy. By averaging the results over multiple episodes, we can mitigate the effects of randomness and strengthen the robustness of the learned policy. + +Implementing multi-episode evaluation in Gegelati involves modifying the `PendulumWrapper` class to support multiple episodes during the evaluation phase. To vary the starting conditions of each episode, the pendulum's angle and angular velocity will be randomly initialized at the beginning of each episode. + +### 0. Modify PendulumWrapper to support multi-episode evaluation +To implement multi-episode evaluation, we will first modify the `PendulumWrapper` class to support a stochastic reset of the pendulum's state at the beginning of each episode. + +To support random initialization, we will use a pseudo-random number generator to generate random values for the pendulum's angle and angular velocity within specified ranges. + +#### TODO 1: +Edit the `/gegelati-tutorial/src/environments/pendulum_wrapper.h`to add a random number generator as a member variable of the `PendulumWrapper` class. This pseudo-random number generator is provided in Gegelati with the `Mutator::RNG` class. + +{% details Solution to #1 (Click to expand) %} +```cpp +/* pendulum_wrapper.h */ +class PendulumWrapper : public Learn::LearningEnvironment { +public: + // Existing code... + + /// Random Number Generator for the environment + Mutator::RNG rng; +``` + +{% enddetails %} + +#### TODO 2: +Next, we will modify the `reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber)` method of the `PendulumWrapper` class to randomly initialize the pendulum's angle and angular velocity at the beginning of each episode. + +When calling the `reset(...)` method of the environment, Gegelati notably provides a `seed` parameter that can be used to seed the environment random number generator, using the `Mutator::RNG::seed(size_t seed)` method. Using this seeding mechanism ensures deterministic reproducibility of the random initialization across different runs. + +Once the RNG is seeded, we will use the `Mutator::RNG::getDouble(double min, double max)` method to generate random values within specified ranges. For example, we can set the angle to be randomly initialized between -π and π radians, and the angular velocity to be randomly initialized between -1.0 and 1.0 radians per second. + +{% details Solution to #2 (Click to expand) %} +The reset method be modified as follows: +```cpp +/* pendulum_wrapper.cpp */ +void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber) { + // Seed the RNG differently for each iteration + this->rng.setSeed(seed); + + // Randomize the initial angle between [-pi, pi] + double initialAngle = this->rng.getDouble(-M_PI, M_PI); + this->pendulum.setAngle(initialAngle); + // Randomize the initial velocity between [-1.0, 1.0] + double initialVelocity = this->rng.getDouble(-1.0, 1.0); + this->pendulum.setVelocity(initialVelocity); +} +``` + +{% enddetails %} + +### 1. Configure multi-episode evaluation in params.json +To enable multi-episode evaluation during training, we need to modify the training parameters in the `params.json` file of the project. + +#### TODO 3: +Edit the `/gegelati-tutorial/params.json` file to set the `nbEpisodesPerEval` parameter to a value greater than 1. This parameter specifies the number of episodes over which each agent will be evaluated during training. For this tutorial, set it to 5. + +{% details Solution to #3 (Click to expand) %} +```json +{ + // Existing parameters... + "nbEpisodesPerEval": 5, + // Existing parameters... +} +``` + +{% enddetails %} + +## Conclusion +In this tutorial, you have successfully enabled multi-episode evaluation for reinforcement learning agents in Gegelati. By evaluating agents over multiple episodes, you have strengthened the robustness of the learned policy and mitigated the effects of randomness in the environment. + +More information about reinforcement learning with Gegelati can be found in the following publication: + +[_K. Desnos, N. Sourbier, P.-Y. Raumer, O. Gesny and M. Pelcat. GEGELATI: Lightweight Artificial Intelligence through Generic and Evolvable Tangled Program Graphs. In Workshop on Design and Architectures for Signal and Image Processing (DASIP), ACM, 2021_](https://arxiv.org/pdf/2012.08296) \ No newline at end of file diff --git a/params.json b/params.json index 182c53b..7fd7e34 100644 --- a/params.json +++ b/params.json @@ -1,113 +1,36 @@ { - // Number of recordings held in the Archive. - // "archiveSize" : 50, // Default value - "archiveSize": 2000, - // Probability of archiving the result of each Program execution. - // "archivingProbability" : 0.05, // Default value - "archivingProbability": 0.01, + // #ifdef SOLUTION_STRENGTHENING // Boolean used to activate an evaluation of the surviving roots in validation // mode after the training at each generation. // "doValidation" : false, // Default value - "doValidation": false, + "doValidation": true, + // #endif // SOLUTION_STRENGTHENING // Maximum number of actions performed on the learning environment during the // each evaluation of a root. // "maxNbActionsPerEval" : 1000, // Default value "maxNbActionsPerEval": 1500, + // #ifdef SOLUTION_STRENGTHENING // Maximum number of times a given root is evaluated.After this number is // reached, possibly after several generations, the score of the root will be // fixed, and no further evaluation will be done. // "maxNbEvaluationPerPolicy" : 1000, // Default value "maxNbEvaluationPerPolicy": 10, + // #endif // SOLUTION_STRENGTHENING "mutation": { - "prog": { - // Maximum constant value possible. - // "maxConstValue" : 100, // Default value - "maxConstValue": 10, - // Maximum number of Line within the Program of the TPG. - // "maxProgramSize" : 96, // Default value - "maxProgramSize": 20, - // Minimum constant value possible. - // "minConstValue" : -10, // Default value - "minConstValue": -10, - // Probability of inserting a line in the Program. - // "pAdd" : 0.5, // Default value - "pAdd": 0.5, - // Probability of each constant to be mutated. - // "pConstantMutation" : 0.5, // Default value - "pConstantMutation": 0.5, - // Probability of deleting a line of the Program. - // "pDelete" : 0.5, // Default value - "pDelete": 0.5, - // Probability of altering a line of the Program. - // "pMutate" : 1.0, // Default value - "pMutate": 1.0, - // Probability of creating a new program. - // "pNewProgram" : 0.0, // Default value - "pNewProgram": 0.0, - // Probability of swapping two lines of the Program. - // "pSwap" : 1.0, // Default value - "pSwap": 1.0 - }, "tpg": { - // When a Program is mutated, makes sure its behavior is no longer the same. - // "forceProgramBehaviorChangeOnMutation" : false, // Default value - "forceProgramBehaviorChangeOnMutation": false, - // Number of root TPGTeams at the initialisation of a TPGGraph. - // If 0, if will be init to the number of surviving roots - // "nbRoots" : 0, // Default value - "initNbRoots": 0, - // Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when - // initialized. - // "maxInitOutgoingEdges" : 3, // Default value - "maxInitOutgoingEdges": 3, - // Maximum number of outgoing edge during TPGGraph mutations. - // "maxOutgoingEdges" : 5, // Default value - "maxOutgoingEdges": 5, // Number of root TPGTeams to maintain when populating the TPGGraph // "nbRoots" : 100, // Default value - "nbRoots": 150, - // Probability of adding an outgoing Edge to a Team. - // "pEdgeAddition" : 0.7, // Default value - "pEdgeAddition": 0.7, - // Probability of deleting an outgoing Edge of a Team. - // "pEdgeDeletion" : 0.7, // Default value - "pEdgeDeletion": 0.7, - // Probability of changing the destination of an Edge. - // "pEdgeDestinationChange" : 0.1, // Default value - "pEdgeDestinationChange": 0.1, - // Probability of the new destination of an Edge to be an Action. - // "pEdgeDestinationIsAction" : 0.5, // Default value - "pEdgeDestinationIsAction": 0.5, - // Probability of mutating the Program of an outgoing Edge. - // "pProgramMutation" : 0.2, // Default value - "pProgramMutation": 0.2 + "nbRoots": 150 } }, // Number of generations of the training. // "nbGenerations" : 500, // Default value "nbGenerations": 1200, - // [Only used in AdversarialLearningAgent.] - // Number of times each job is evaluated in the learning process. - // Each root may belong to several jobs, hence this parameter should be lower - // than the nbIterationsPerPolicyEvaluation parameter. - // "nbIterationsPerJob" : 1, // Default value - "nbIterationsPerJob": 1, // Number of evaluation of each root per generation. // "nbIterationsPerPolicyEvaluation" : 5, // Default value + //#ifdef SOLUTION_STRENGTHENING + "nbIterationsPerPolicyEvaluation": 5, + //#else // SOLUTION_STRENGTHENING "nbIterationsPerPolicyEvaluation": 1, - // Number of Constant available in each Program. - // "nbProgramConstant" : 0, // Default value - "nbProgramConstant": 0, - // Number of registers for the Program execution. - // "nbRegisters" : 8, // Default value - "nbRegisters": 8, - // [Only used in ParallelLearningAgent and child classes.] - // Number of threads used for the training process. - // When undefined in the json file, this parameter is automatically set to the - // number of cores of the CPU. - // /* "nbThreads" : 0,*/ // Commented by default - /* "nbThreads" : 0,*/ - // Percentage of deleted (and regenerated) root TPGVertex at each generation. - // "ratioDeletedRoots" : 0.5, // Default value - "ratioDeletedRoots": 0.85 + //#endif // SOLUTION_STRENGTHENING } \ No newline at end of file diff --git a/scripts/prepare_archives.py b/scripts/prepare_archives.py index 9ceeee9..b129576 100644 --- a/scripts/prepare_archives.py +++ b/scripts/prepare_archives.py @@ -38,21 +38,40 @@ def zipFilesInDir(dirName, zipObj, regex, parentName="", withSubdirectories = Tr # Add file to zip zipObj.write(filePath, parentName + filePath) +def replace_file_in_zip(zip_path, file_to_add, arcname): + """ + Replace a file in a zip archive by first removing the existing file (if present), + then adding the new file. + """ + import tempfile + # Create a temporary zip file + tmpfd, tmpname = tempfile.mkstemp(suffix='.zip') + os.close(tmpfd) + with ZipFile(zip_path, 'r') as zin, ZipFile(tmpname, 'w') as zout: + for item in zin.infolist(): + if item.filename != arcname: + zout.writestr(item, zin.read(item.filename)) + # Now add the new file + zout.write(file_to_add, arcname) + # Replace the original zip with the modified one + shutil.move(tmpname, zip_path) # Create the tutorialTemplate archive mainFolder = "gegelati-tutorial/" tutorialTemplateArchive = ZipFile("./docs/data/gegelati-tutorial.zip", "w") zipFileAdd(tutorialTemplateArchive,"bin/", mainFolder) -zipFilesInDir("./",tutorialTemplateArchive, r'^(?!.*(CMakeLists))[^\.]+.*', mainFolder, False) # exclude .gitgnore and CMakeLists files +zipFilesInDir("./",tutorialTemplateArchive, r'^(?!.*(CMakeLists|params))[^\.]+.*', mainFolder, False) # exclude .gitgnore and CMakeLists files zipFilesInDir("./dat/",tutorialTemplateArchive, r'.*', mainFolder) zipFilesInDir("./lib/",tutorialTemplateArchive, r'.*', mainFolder) zipFilesInDir("src/",tutorialTemplateArchive, r'.*', mainFolder, False) zipFilesInDir("src/manual/",tutorialTemplateArchive, r'.*', mainFolder) -zipFilesInDir("src/training",tutorialTemplateArchive, r'^(?!.*(pendulum_wrapper))', mainFolder, False) # all files except pendulum_wrapper +zipFilesInDir("src/training",tutorialTemplateArchive, r'^(?!.*(pendulum_wrapper|main-training))', mainFolder, False) # all files except pendulum_wrapper or main-training tutorialTemplateArchive.write("src/training/pendulum_wrapper_empty.cpp", mainFolder + "src/training/pendulum_wrapper.cpp" ) # overwrite empty_file tutorialTemplateArchive.write("src/training/pendulum_wrapper_empty.h", mainFolder + "src/training/pendulum_wrapper.h") # overwrite empty_file +tutorialTemplateArchive.write("src/training/main-training_empty.cpp", mainFolder + "src/training/main-training.cpp") # overwrite empty_file tutorialTemplateArchive.write("CMakeLists_empty.txt", mainFolder + "CMakeLists.txt") # overwrite empty_file +tutorialTemplateArchive.write("params_empty.json", mainFolder + "params.json") # overwrite empty_file tutorialTemplateArchive.close() # Create the pendulum_wrapper_solution archive @@ -61,20 +80,27 @@ def zipFilesInDir(dirName, zipObj, regex, parentName="", withSubdirectories = Tr pendulumWrapperSolutionArchive.write("src/training/pendulum_wrapper_solution.h", "pendulum_wrapper.h") # overwrite empty_file pendulumWrapperSolutionArchive.close() -# Create the gegelati-tutorial-solution archive + +# Create the gegelati-tutorial-solution archive by copying the template and patching needed files +mainFolder = "gegelati-tutorial/" +shutil.copy2("./docs/data/gegelati-tutorial.zip", "./docs/data/gegelati-tutorial-solution.zip") +replace_file_in_zip("./docs/data/gegelati-tutorial-solution.zip", "src/training/pendulum_wrapper_solution.cpp", mainFolder + "src/training/pendulum_wrapper.cpp") +replace_file_in_zip("./docs/data/gegelati-tutorial-solution.zip", "src/training/pendulum_wrapper_solution.h", mainFolder + "src/training/pendulum_wrapper.h") + +# Create the gegelati-tutorial-strengthening-solution archive by copying the solution archive mainFolder = "gegelati-tutorial/" -tutorialSolutionArchive = ZipFile("./docs/data/gegelati-tutorial-solution.zip", "w") -zipFileAdd(tutorialSolutionArchive,"bin/", mainFolder) -zipFilesInDir("./",tutorialSolutionArchive, r'^(?!.*(CMakeLists))[^\.]+.*', mainFolder, False) # exclude .gitgnore and CMakeLists files -zipFilesInDir("./dat/",tutorialSolutionArchive, r'.*', mainFolder) -zipFilesInDir("./lib/",tutorialSolutionArchive, r'.*', mainFolder) -zipFilesInDir("src/",tutorialSolutionArchive, r'.*', mainFolder, False) -zipFilesInDir("src/manual/",tutorialSolutionArchive, r'.*', mainFolder) -zipFilesInDir("src/training",tutorialSolutionArchive, r'^(?!.*(pendulum_wrapper))', mainFolder, False) # all files except pendulum_wrapper -tutorialSolutionArchive.write("src/training/pendulum_wrapper_solution.cpp", mainFolder + "src/training/pendulum_wrapper.cpp" ) # overwrite empty_file -tutorialSolutionArchive.write("src/training/pendulum_wrapper_solution.h", mainFolder + "src/training/pendulum_wrapper.h") # overwrite empty_file -tutorialSolutionArchive.write("CMakeLists_empty.txt", mainFolder + "CMakeLists.txt") # overwrite empty_file -tutorialSolutionArchive.close() +shutil.copy2("./docs/data/gegelati-tutorial-solution.zip", "./docs/data/gegelati-tutorial-strengthening-solution.zip") +replace_file_in_zip("./docs/data/gegelati-tutorial-strengthening-solution.zip", "src/training/pendulum_wrapper_strengthening.cpp", mainFolder + "src/training/pendulum_wrapper.cpp") +replace_file_in_zip("./docs/data/gegelati-tutorial-strengthening-solution.zip", "src/training/pendulum_wrapper_strengthening.h", mainFolder + "src/training/pendulum_wrapper.h") +replace_file_in_zip("./docs/data/gegelati-tutorial-strengthening-solution.zip", "params_strengthening.json", mainFolder + "params.json") + +# Create the gegelati-tutorial-parallel-solution archive by copying the solution archive +mainFolder = "gegelati-tutorial/" +shutil.copy2("./docs/data/gegelati-tutorial-strengthening-solution.zip", "./docs/data/gegelati-tutorial-parallel-solution.zip") +replace_file_in_zip("./docs/data/gegelati-tutorial-parallel-solution.zip", "src/training/pendulum_wrapper_parallel.cpp", mainFolder + "src/training/pendulum_wrapper.cpp") +replace_file_in_zip("./docs/data/gegelati-tutorial-parallel-solution.zip", "src/training/pendulum_wrapper_parallel.h", mainFolder + "src/training/pendulum_wrapper.h") +replace_file_in_zip("./docs/data/gegelati-tutorial-parallel-solution.zip", "src/training/main-training_parallel.cpp", mainFolder + "src/training/main-training.cpp") + +# Make the main-inference.cpp file available for download +shutil.copy2("./src/inference/main-inference.cpp", "./docs/data/") -# Make the main-inference.cpp file available -shutil.copy2("./src/inference/main-inference.cpp", "./docs/data/") \ No newline at end of file diff --git a/scripts/prepare_template.py b/scripts/prepare_template.py index 33f4811..cf232e0 100644 --- a/scripts/prepare_template.py +++ b/scripts/prepare_template.py @@ -4,103 +4,134 @@ # License: CeCILL-C import re +import io # Function filtering the solution out of the input file. # also filters double empty lines that may result from filtering. -def filterSolution(inputFile, outputEmptyFile, outputSolutionFile): - - # Scan lines - isSolution = False - isTemplate = False - emptyEmptyLine = False - emptySolutionLine = False - for line in inputFile: - # Check if the line is the #define - if(re.match(r'.*#define SOLUTION.*\n', line)): - continue # skip the line - - # Check if the line starts a solution block - if(re.match(r'.*#ifdef SOLUTION.*\n', line)): - isSolution = True - continue # skip the line - - # Check if the line start a template block - if(isSolution and re.match(r'.*#else.*\n', line)): - isSolution = False - isTemplate = True - continue # skip the line - - # Check if the line start a template block - if((isSolution or isTemplate ) and re.match(r'.*#endif // SOLUTION.*\n', line)): - isSolution = False - isTemplate = False - continue # skip the line - - printEmptyLine = False - printSolutionLine = False - - if(isTemplate): - printEmptyLine=True - - if(isSolution): - printSolutionLine = True - - if(not isSolution and not isTemplate): - printEmptyLine=True - printSolutionLine = True - - # Print line in empty file - if(printEmptyLine): - if(re.match(r'\s*\n', line)): - if(emptyEmptyLine): - continue # skipLine - else: - emptyEmptyLine=True +def filterSolution(inputFile, outputFile, keepSolution, patterns): + # Start with the original input file + currentInput = inputFile + + # Apply each pattern sequentially + for pattern in patterns: + # Use an in-memory buffer to store intermediate results + tempOutput = io.StringIO() + currentInput.seek(0) + + # Stack to track active blocks + blockStack = [] + inElseBlock = False # Flag to track if we are in an `#else` block + emptyLine = False + + for line in currentInput: + # Check if the line is the #define + if re.match(rf'.*#define SOLUTION.*\n', line): + continue # skip the line + + # Check if the line starts a solution block + matchIfdef = re.match(rf'.*#ifdef ({pattern})\s*\n', line) + if matchIfdef and not inElseBlock: + blockStack.append(matchIfdef.group(1)) # Push the matched pattern onto the stack + inElseBlock = False # Reset the `else` flag + continue # skip the line + + # Check if the line starts a template block + if blockStack and re.match(rf'.*#else // ({pattern})\s*\n', line): + inElseBlock = True # Mark that we are in the `else` section + continue # skip the line + + # Check if the line ends a block + matchEndif = re.match(rf'.*#endif // ({pattern})\s*\n', line) + if blockStack and matchEndif and blockStack[-1] == matchEndif.group(1): + blockStack.pop() # Pop the matched block + inElseBlock = False # Reset the `else` flag + continue # skip the line + + printLine = False + if keepSolution: + if not blockStack or not inElseBlock: + printLine = True else: - emptyEmptyLine=False - - outputEmptyFile.write(line) - - # Print line in solution file - if(printSolutionLine): - if(re.match(r'\s*\n', line)): - if(emptySolutionLine): - continue # skipLine + if not blockStack or inElseBlock: + printLine = True + + if printLine: + if re.match(r'\s*\n', line): + if emptyLine: + continue # skipLine + else: + emptyLine = True else: - emptySolutionLine=True - else: - emptySolutionLine=False - - outputSolutionFile.write(line) - - -# Open the files -cppInputFile = open("./src/training/pendulum_wrapper.cpp","r") -cppEmptyOutputFile = open("./src/training/pendulum_wrapper_empty.cpp", "w") -cppSolutionOutputFile = open("./src/training/pendulum_wrapper_solution.cpp", "w") -hInputFile = open("./src/training/pendulum_wrapper.h","r") -hEmptyOutputFile = open("./src/training/pendulum_wrapper_empty.h", "w") -hSolutionOutputFile = open("./src/training/pendulum_wrapper_solution.h", "w") -txtInputCMakeListsFile = open("./CMakeLists.txt", "r") -txtEmptyCMakeListsFile = open("./CMakeLists_empty.txt", "w") -txtSolutionCMakeListsFile = open("./CMakeLists_solution.txt", "w") - -if(not cppInputFile or not cppEmptyOutputFile or not hInputFile or not hEmptyOutputFile or not txtEmptyCMakeListsFile or not txtInputCMakeListsFile or not txtSolutionCMakeListsFile): - exit - -## Filter cpp files -filterSolution(hInputFile, hEmptyOutputFile, hSolutionOutputFile) -filterSolution(cppInputFile, cppEmptyOutputFile, cppSolutionOutputFile) -filterSolution(txtInputCMakeListsFile, txtEmptyCMakeListsFile, txtSolutionCMakeListsFile) - -# Close files -cppInputFile.close() -cppEmptyOutputFile.close() -cppSolutionOutputFile.close() -hInputFile.close() -hEmptyOutputFile.close() -hSolutionOutputFile.close() -txtSolutionCMakeListsFile.close() -txtInputCMakeListsFile.close() -txtEmptyCMakeListsFile.close() + emptyLine = False + tempOutput.write(line) + + # Replace the current input with the output of this iteration + tempOutput.seek(0) + currentInput = tempOutput + + # Write the final result to the output file + currentInput.seek(0) + for line in currentInput: + outputFile.write(line) + + +# Files to filter +# Each entry: [inputPath, [(outputPath, patternsToRemove, patternsToKeep), ...]] +files = [ + ["./src/training/pendulum_wrapper.cpp", [ + ["./src/training/pendulum_wrapper_empty.cpp", ["SOLUTION","SOLUTION_.*"], []], + ["./src/training/pendulum_wrapper_solution.cpp", ["SOLUTION_.*"], ["SOLUTION"]], + ["./src/training/pendulum_wrapper_strengthening.cpp", ["SOLUTION_PARALLEL"], ["SOLUTION(_STRENGTHENING)*"]], + ["./src/training/pendulum_wrapper_parallel.cpp", [], ["SOLUTION_(PARALLEL|STRENGTHENING)", "SOLUTION"]], + ]], + ["./src/training/pendulum_wrapper.h", [ + ["./src/training/pendulum_wrapper_empty.h", ["SOLUTION.*"], []], + ["./src/training/pendulum_wrapper_solution.h", ["SOLUTION_.*"], ["SOLUTION.*"]], + ["./src/training/pendulum_wrapper_strengthening.h", ["SOLUTION_PARALLEL"], ["SOLUTION(_STRENGTHENING)*"]], + ["./src/training/pendulum_wrapper_parallel.h", [], ["SOLUTION_(PARALLEL|STRENGTHENING)", "SOLUTION"]], + ]], + ["./CMakeLists.txt", [ + ["./CMakeLists_empty.txt", ["SOLUTION.*"], []], + ["./CMakeLists_inference.txt", [], ["SOLUTION_INFERENCE"]], + ]], + ["./src/training/main-training.cpp", [ + ["./src/training/main-training_empty.cpp", ["SOLUTION.*"], []], + ["./src/training/main-training_parallel.cpp", [], ["SOLUTION_PARALLEL"]], + ]], + ["./params.json", [ + ["./params_empty.json", ["SOLUTION.*"], []], + ["./params_strengthening.json", ["SOLUTION_PARALLEL"], ["SOLUTION_STRENGTHENING"]], + ]], +] + +# Prepare files +for fileSet in files: + inputFilePath = fileSet[0] + outputs = fileSet[1] + + inputFile = open(inputFilePath, "r") + if not inputFile: + continue + + for out in outputs: + outputPath, patternsRemove, patternsKeep = out + outputFile = open(outputPath, "w") + if patternsRemove and not patternsKeep: + filterSolution(inputFile, outputFile, False, patternsRemove) + elif patternsKeep and not patternsRemove: + filterSolution(inputFile, outputFile, True, patternsKeep) + elif patternsKeep and patternsRemove: + # Remove first into an in-memory buffer, then keep from that buffer + temp = io.StringIO() + filterSolution(inputFile, temp, False, patternsRemove) + filterSolution(temp, outputFile, True, patternsKeep) + temp.close() + else: + # No pattern provided: copy file as-is + inputFile.seek(0) + for line in inputFile: + outputFile.write(line) + outputFile.close() + + inputFile.close() diff --git a/src/manual/main-manual.cpp b/src/manual/main-manual.cpp index 3301f52..0db2ddd 100644 --- a/src/manual/main-manual.cpp +++ b/src/manual/main-manual.cpp @@ -37,7 +37,7 @@ int main(int argc, char** argv) { while(!exit) { frame++; - int action = Renderer::renderEnv(p.getAngle(), torque, frame, 0, p.TIME_DELTA); + int action = Renderer::renderEnv(p.getAngle(), torque, frame, 0, p.TIME_DELTA, false); exit = (action == INT_MIN); if (exit) { diff --git a/src/renderer.cpp b/src/renderer.cpp index 63a9435..cb006ff 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -123,7 +122,7 @@ void Renderer::displayText(const char* text, int posX, int posY) { } -int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t generation, double timeDelta) { +int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t generation, double timeDelta, bool syncReset) { // Select the color for drawing. It is set to red here. SDL_SetRenderDrawColor(display.renderer, 255, 255, 255, 255); // Clear the entire screen to our selected color. @@ -161,6 +160,16 @@ int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t ge sprintf(frameNumber, "frame: %4" PRId64, frame); Renderer::displayText(frameNumber, 0, 22); + // Print Sync status + char syncString[32]; + if (syncReset) { + sprintf(syncString, "[W] Training waits on display."); + } + else { + sprintf(syncString, ""); + } + Renderer::displayText(syncString, DISPLAY_W - 320, 0); + // Proceed to the actual display SDL_RenderPresent(display.renderer); @@ -176,6 +185,8 @@ int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t ge // This is needed because repeated action are not grabbed at every frame // even when the key remains pressed. static int action = 0; + // Flag to ensure 'w' is only sent once per physical press + static bool w_consumed = false; SDL_Event event; // Grab all next events off the queue. @@ -205,12 +216,24 @@ int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t ge case SDLK_l: action = 3; break; + case SDLK_w: + // only on initial keydown (no OS repeat) + if (event.key.repeat == 0) { + action = 4; + // mark as not yet consumed for this press + w_consumed = false; + } + break; } break; case SDL_QUIT: action = INT_MIN; break; case SDL_KEYUP: + // Reset per-key consumption state when released + if (event.key.keysym.sym == SDLK_w) { + w_consumed = false; + } action = 0; break; default: @@ -222,11 +245,31 @@ int Renderer::renderEnv(double state, double torque, uint64_t frame, uint64_t ge } } - return action; + // Ensure 'w' action is only returned once per physical press. + int ret = action; + if (ret == 4) { + if (!w_consumed) { + // first time we return 4 for this press: mark consumed and clear persistent action + w_consumed = true; + action = 0; + } + else { + // already consumed: don't repeat + ret = 0; + } + } + + return ret; } void Renderer::replayThread(std::atomic& exit, std::atomic& doDisplay, std::atomic& generation, double& delta, std::deque>& replay) { + std::cout << "Pendulum training." << std::endl; + std::cout << "By default, the replay will be reset when a new generation is over." << std::endl << + "\t [W]: Toggle stalling the TPG training until the replay of previous generation is over." << std::endl << + "\t [Q]: Exit the simulator." << std::endl; + std::cout << std::endl << "Press [Enter] to start the training."; + // Init Display renderInit(); @@ -235,12 +278,13 @@ void Renderer::replayThread(std::atomic& exit, std::atomic& doDispla double angleDisplay = M_PI; double torqueDisplay = 0.0; uint64_t frame = 0; + bool waitForReplayEnd = false; std::deque> localReplay; while (!exit) { // Was a replay requested? - if (doDisplay) { + if (doDisplay && (!waitForReplayEnd || localReplay.empty())) { // copy the replay localReplay = replay; doDisplay = false; @@ -253,12 +297,15 @@ void Renderer::replayThread(std::atomic& exit, std::atomic& doDispla localReplay.pop_front(); } - int event = Renderer::renderEnv(angleDisplay, torqueDisplay, frame, generation, delta); + int event = Renderer::renderEnv(angleDisplay, torqueDisplay, frame, generation, delta, waitForReplayEnd); switch (event) { case INT_MIN: exit = true; doDisplay = false; break; + case 4: + waitForReplayEnd = !waitForReplayEnd; + break; case 0: default: // Nothing to do @@ -277,4 +324,4 @@ void Renderer::renderFinalize() SDL_DestroyTexture(display.textureArrow); SDL_DestroyRenderer(display.renderer); SDL_DestroyWindow(display.screen); -} +} \ No newline at end of file diff --git a/src/renderer.h b/src/renderer.h index 52c7926..0f4be42 100644 --- a/src/renderer.h +++ b/src/renderer.h @@ -49,13 +49,15 @@ namespace Renderer { * \param[in] torque the torque currently applied to the pendulum. * \param[in] frame the frame number to display in the top left corner. * \param[in] generation the generation number to display in the top left corner. + * \param[in] syncReset Flag to indicate whether the training is synced + * with the replay. * \return An int value is returned to the controller loop depending * on the action made by the user: * - [-3, 3]: 6 actions available to apply a torque to the pendulum. * - INT_MIN: Exit request. * \param[in] timeDelta time in second between two frames. */ - int renderEnv(double state, double torque, uint64_t frame, uint64_t generation, double timeDelta); + int renderEnv(double state, double torque, uint64_t frame, uint64_t generation, double timeDelta, bool syncReset); /** * \brief Separate control loop for displaying replays in parallel to training. diff --git a/src/training/main-training.cpp b/src/training/main-training.cpp index 3ae134c..ab693a8 100644 --- a/src/training/main-training.cpp +++ b/src/training/main-training.cpp @@ -16,7 +16,9 @@ #include "pendulum_wrapper.h" +#ifndef DEACTIVATE_DISPLAY #define DEACTIVATE_DISPLAY 0 +#endif /** @@ -48,7 +50,11 @@ void train_main(std::atomic& exitProgram, std::atomic& doDisplay, st #endif // Instantiate and initialize the Learning Agent (LA) + #ifdef SOLUTION_PARALLEL + Learn::ParallelLearningAgent la(pendulumLE, instructionSet, params); + #else // SOLUTION_PARALLEL Learn::LearningAgent la(pendulumLE, instructionSet, params); + #endif // SOLUTION_PARALLEL la.init(); // Basic logger for the training process @@ -87,15 +93,14 @@ int main(int argc, char** argv) { #if ( DEACTIVATE_DISPLAY == 0 ) // Start training in secondary thread std::thread threadTraining(train_main, std::ref(exitProgram), std::ref(doDisplay), std::ref(generation), std::ref(time_delta), std::ref(replay)); + // Replay code + Renderer::replayThread(exitProgram, doDisplay, generation, time_delta, replay); #else std::cout << "No display version, send interrupt signal to process to exit." << std::endl; // Start training in main thread train_main(exitProgram, doDisplay, generation, time_delta, replay); #endif - // Replay code - Renderer::replayThread(exitProgram, doDisplay, generation, time_delta, replay); - #if ( DEACTIVATE_DISPLAY == 0 ) // Exit the display thread threadTraining.join(); diff --git a/src/training/pendulum_wrapper.cpp b/src/training/pendulum_wrapper.cpp index cd7e435..52c1693 100644 --- a/src/training/pendulum_wrapper.cpp +++ b/src/training/pendulum_wrapper.cpp @@ -2,7 +2,7 @@ #ifdef SOLUTION const std::vector PendulumWrapper::actions{ -1.0, -0.66, -0.33, 0.0, 0.33, 0.66, 1.0 }; -#else +#else // SOLUTION const std::vector PendulumWrapper::actions{ 0.0 }; #endif // SOLUTION @@ -12,12 +12,27 @@ PendulumWrapper::PendulumWrapper() : LearningEnvironment(actions.size()), pendul data.at(0).setPointer(&this->pendulum.getAngle()); data.at(1).setPointer(&this->pendulum.getVelocity()); } -#else +#else // SOLUTION PendulumWrapper::PendulumWrapper() : LearningEnvironment(actions.size()) { } #endif // SOLUTION +#ifdef SOLUTION_PARALLEL +PendulumWrapper::PendulumWrapper(const PendulumWrapper& other) : LearningEnvironment(other), pendulum(), data(other.data) +{ + // Set pointers of the copy to its own pendulum. + data.at(0).setPointer(&this->pendulum.getAngle()); + data.at(1).setPointer(&this->pendulum.getVelocity()); +} +#endif // SOLUTION_PARALLEL + +#ifdef SOLUTION_PARALLEL +Learn::LearningEnvironment* PendulumWrapper::clone(void) const{ + return new PendulumWrapper(*this); +} +#endif // SOLUTION_PARALLEL + std::vector> PendulumWrapper::getDataSources() { #ifdef SOLUTION @@ -25,17 +40,36 @@ std::vector> PendulumWrapper::ge result.push_back(this->data.at(0)); result.push_back(this->data.at(1)); return result; -#else +#else // SOLUTION return std::vector>(); #endif // SOLUTION } void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber) { +#ifdef SOLUTION_STRENGTHENING + // In TRAINING mode, randomize the initial state + if (mode == Learn::LearningMode::TRAINING) { + // Seed the RNG differently for each iteration + this->rng.setSeed(seed + iterationNumber); + } + else { + // In VALIDATION and TESTING modes, use fixed seeds for reproducibility + this->rng.setSeed(iterationNumber); + } + + // Randomize the initial angle between [- pi, pi] + double initialAngle = this->rng.getDouble(-M_PI, M_PI); + this->pendulum.setAngle(initialAngle); + // Randomize the initial velocity between [-1.0, 1.0] + double initialVelocity = this->rng.getDouble(-1.0, 1.0); + this->pendulum.setVelocity(initialVelocity); +#else // SOLUTION_STRENGTHENING #ifdef SOLUTION this->pendulum.setAngle(M_PI); this->pendulum.setVelocity(0.0); #endif // SOLUTION +#endif // SOLUTION_STRENGTHENING #ifdef SOLUTION this->accumulatedReward = 0.0; #endif // SOLUTION @@ -68,7 +102,7 @@ double PendulumWrapper::getScore(void) const { #ifdef SOLUTION return accumulatedReward; -#else +#else // SOLUTION return 0.0; #endif // SOLUTION } @@ -77,3 +111,10 @@ bool PendulumWrapper::isTerminal(void) const { return false; } + +#ifdef SOLUTION_PARALLEL +bool PendulumWrapper::isCopyable(void) const +{ + return true; +} +#endif // SOLUTION_PARALLEL diff --git a/src/training/pendulum_wrapper.h b/src/training/pendulum_wrapper.h index 2c149f2..31fb594 100644 --- a/src/training/pendulum_wrapper.h +++ b/src/training/pendulum_wrapper.h @@ -41,9 +41,30 @@ class PendulumWrapper : public Learn::LearningEnvironment { double accumulatedReward; #endif // SOLUTION +#ifdef SOLUTION_STRENGTHENING + /// Random Number Generator for the environment + Mutator::RNG rng; +#endif // SOLUTION_STRENGTHENING + /// Default constructor for the PendulumWrapper PendulumWrapper(); +#ifdef SOLUTION_PARALLEL + /// Copy constructor for the PendulumWrapper + PendulumWrapper(const PendulumWrapper& other); +#endif // SOLUTION_PARALLEL + +#ifdef SOLUTION_PARALLEL + /** + * \brief Get a copy of the LearningEnvironment. + * + * This method should return a deep copy of the LearningEnvironment. + * + * \return a copy of the LearningEnvironment. + */ + virtual Learn::LearningEnvironment* clone(void) const override; +#endif // SOLUTION_PARALLEL + /** * \brief Get the data sources for this LearningEnvironment. * @@ -127,6 +148,17 @@ class PendulumWrapper : public Learn::LearningEnvironment { * \return a boolean indicating termination. */ virtual bool isTerminal(void) const override; + +#ifdef SOLUTION_PARALLEL + /** + * \brief Can the LearningEnvironment be copy constructed to evaluate + * several LearningAgent in parallel. + * + * \return true if the LearningEnvironment can be copied and run in + * parallel. + */ + virtual bool isCopyable() const override; +#endif // SOLUTION_PARALLEL }; #endif // !PENDULUM_WRAPPER_H