-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AnyParameterProperties refactor #4412
Changes from 6 commits
0a8628d
c505c2a
3aba095
476d2a9
3e32f1c
dfb5aea
2bf79c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,14 @@ | ||
/* | ||
* This software is distributed under BSD 3-clause license (see LICENSE file). | ||
* | ||
* Authors: Heiko Strathmann, Gil Hoben | ||
*/ | ||
|
||
#ifndef __ANYPARAMETER_H__ | ||
#define __ANYPARAMETER_H__ | ||
|
||
#include <shogun/lib/any.h> | ||
#include <shogun/lib/bitmask_operators.h> | ||
|
||
#include <string> | ||
|
||
|
@@ -22,48 +29,96 @@ namespace shogun | |
GRADIENT_AVAILABLE = 1 | ||
}; | ||
|
||
/** parameter properties */ | ||
enum class ParameterProperties | ||
{ | ||
HYPER = 1u << 0, | ||
GRADIENT = 1u << 1, | ||
MODEL = 1u << 2 | ||
}; | ||
|
||
enableEnumClassBitmask(ParameterProperties); | ||
|
||
/** @brief Class AnyParameterProperties keeps track of of parameter meta | ||
* information, such as properties and descriptions The parameter properties | ||
* can be either true or false. These properties describe if a parameter is | ||
* for example a hyperparameter or if it has a gradient. | ||
*/ | ||
class AnyParameterProperties | ||
{ | ||
public: | ||
static const int32_t HYPER = 1u << 0; | ||
static const int32_t GRADIENT = 1u << 1; | ||
static const int32_t MODEL = 1u << 2; | ||
|
||
/** Default constructor where all parameter properties are false | ||
*/ | ||
AnyParameterProperties() | ||
: m_description(), m_model_selection(MS_NOT_AVAILABLE), | ||
m_gradient(GRADIENT_NOT_AVAILABLE) | ||
: m_description("No description given"), m_attribute_mask(0) | ||
{ | ||
} | ||
/** Constructor | ||
* @param description parameter description | ||
* @param hyperparameter set to true for parameters that determine | ||
* how training is performed, e.g. regularisation parameters | ||
* @param gradient set to true for parameters required for gradient | ||
* updates | ||
* @param model set to true for parameters used in inference, e.g. | ||
* weights and bias | ||
* */ | ||
AnyParameterProperties( | ||
std::string description, | ||
EModelSelectionAvailability model_selection = MS_NOT_AVAILABLE, | ||
EGradientAvailability gradient = GRADIENT_NOT_AVAILABLE) | ||
: m_description(description), m_model_selection(model_selection), | ||
EModelSelectionAvailability hyperparameter = MS_NOT_AVAILABLE, | ||
EGradientAvailability gradient = GRADIENT_NOT_AVAILABLE, | ||
bool model = false) | ||
: m_description(description), m_model_selection(hyperparameter), | ||
m_gradient(gradient) | ||
{ | ||
m_attribute_mask = (hyperparameter << 0 & HYPER) | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was more thinking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh that's too low-level. Can we introduce a constexpr function for that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah actually good point! :) |
||
(gradient << 1 & GRADIENT) | | ||
(model << 2 & MODEL); | ||
} | ||
/** Mask constructor | ||
* @param description parameter description | ||
* @param attribute_mask mask encoding parameter properties | ||
* */ | ||
AnyParameterProperties(std::string description, int32_t attribute_mask) | ||
: m_description(description) | ||
{ | ||
m_attribute_mask = attribute_mask; | ||
} | ||
/** Copy contructor */ | ||
AnyParameterProperties(const AnyParameterProperties& other) | ||
: m_description(other.m_description), | ||
m_model_selection(other.m_model_selection), | ||
m_gradient(other.m_gradient) | ||
m_gradient(other.m_gradient), | ||
m_attribute_mask(other.m_attribute_mask) | ||
{ | ||
} | ||
|
||
std::string get_description() const | ||
const std::string& get_description() const | ||
{ | ||
return m_description; | ||
} | ||
|
||
EModelSelectionAvailability get_model_selection() const | ||
{ | ||
return m_model_selection; | ||
return static_cast<EModelSelectionAvailability>( | ||
(m_attribute_mask & HYPER) > 0); | ||
karlnapf marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
EGradientAvailability get_gradient() const | ||
{ | ||
return m_gradient; | ||
return static_cast<EGradientAvailability>( | ||
(m_attribute_mask & GRADIENT) > 0); | ||
} | ||
bool get_model() const | ||
{ | ||
return static_cast<bool>(m_attribute_mask & MODEL); | ||
} | ||
|
||
private: | ||
std::string m_description; | ||
EModelSelectionAvailability m_model_selection; | ||
EGradientAvailability m_gradient; | ||
int32_t m_attribute_mask; | ||
}; | ||
|
||
class AnyParameter | ||
|
@@ -116,6 +171,6 @@ namespace shogun | |
Any m_value; | ||
AnyParameterProperties m_properties; | ||
}; | ||
} | ||
} // namespace shogun | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#ifndef JSS_BITMASK_HPP | ||
#define JSS_BITMASK_HPP | ||
|
||
// (C) Copyright 2015 Just Software Solutions Ltd | ||
// | ||
// Distributed under the Boost Software License, Version 1.0. | ||
// | ||
// Boost Software License - Version 1.0 - August 17th, 2003 | ||
// | ||
// Permission is hereby granted, free of charge, to any person or | ||
// organization obtaining a copy of the software and accompanying | ||
// documentation covered by this license (the "Software") to use, | ||
// reproduce, display, distribute, execute, and transmit the | ||
// Software, and to prepare derivative works of the Software, and | ||
// to permit third-parties to whom the Software is furnished to | ||
// do so, all subject to the following: | ||
// | ||
// The copyright notices in the Software and this entire | ||
// statement, including the above license grant, this restriction | ||
// and the following disclaimer, must be included in all copies | ||
// of the Software, in whole or in part, and all derivative works | ||
// of the Software, unless such copies or derivative works are | ||
// solely in the form of machine-executable object code generated | ||
// by a source language processor. | ||
// | ||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
// KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | ||
// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
// PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE | ||
// COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE | ||
// LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN | ||
// CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
// THE SOFTWARE. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could put "modified by G.F" here if you want There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ok! I wasn't sure how it works with the boost software license! |
||
|
||
#include<type_traits> | ||
|
||
namespace shogun { | ||
|
||
template<typename E> | ||
struct enable_bitmask_operators { | ||
static constexpr bool enable = false; | ||
}; | ||
|
||
#define enableEnumClassBitmask(T) template<> \ | ||
struct enable_bitmask_operators<T> \ | ||
{ \ | ||
static constexpr bool enable = true; \ | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E>::type | ||
operator|(E lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
return static_cast<E>( | ||
static_cast<underlying>(lhs) | static_cast<underlying>(rhs)); | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E>::type | ||
operator&(E lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
return static_cast<E>( | ||
static_cast<underlying>(lhs) & static_cast<underlying>(rhs)); | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E>::type | ||
operator^(E lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
return static_cast<E>( | ||
static_cast<underlying>(lhs) ^ static_cast<underlying>(rhs)); | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E>::type | ||
operator~(E lhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
return static_cast<E>( | ||
~static_cast<underlying>(lhs)); | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E &>::type | ||
operator|=(E &lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
lhs = static_cast<E>( | ||
static_cast<underlying>(lhs) | static_cast<underlying>(rhs)); | ||
return lhs; | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E &>::type | ||
operator&=(E &lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
lhs = static_cast<E>( | ||
static_cast<underlying>(lhs) & static_cast<underlying>(rhs)); | ||
return lhs; | ||
} | ||
|
||
template<typename E> | ||
typename std::enable_if<enable_bitmask_operators<E>::enable, E &>::type | ||
operator^=(E &lhs, E rhs) { | ||
typedef typename std::underlying_type<E>::type underlying; | ||
lhs = static_cast<E>( | ||
static_cast<underlying>(lhs) ^ static_cast<underlying>(rhs)); | ||
return lhs; | ||
} | ||
} | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah wait. the thing I had in mind was
and then
This is now difference, as you are effectively storing masks themselves in these const members, not the number of bit shifts. Or am I wrong? Let me stare a bit more to understand
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, but start at 1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, if we have:
Then the above mask will always be false/0 no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean more like this:
which is the equivalent of: shift bit at position X (1,2 or 3 in this case) if the function parameter is true.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah now I remember why I did it this way.. Basically if you want to provide an interface to create your own mask like this
you need to have the values of the static member with the shift. Unless I am missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what you mean, but what I have seen (and been doing) would result in
It is not as neat, you are totally right!. BUT it can be hidden quite easily either.
I mean, it is your code, Ill leave it to you. Not sure whether it is possible to say what is better now, we will have to commit to something and then see if it works :) So go ahead with the above solution and then we see where we get
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK! Btw why does it need to start at 1, instead of 0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah, no that was nonsense (just needed to make sure that you never get an all zero mask)
BTW I also realised why the micro-controller people store the number of bits rather than the full masks: it uses less memory (which is not really relevant here, but still good to think about it for a moment).
so with an 8bit integer, you can do 255 attributes if you generate the masks on the fly a la
(1 << HYPER)
. If you wanted to store the mask for each attribute instead (i.e.HYPER = 0x00000010
(each mask is 8bit), you would have to store 255 * 8bit, which is quite a difference , especially if you have only 4k memory as on an AVR ;)How many parameters are registered during a shogun run? Let's say 100 is realistic. Then we have 100 byte usage if we store only the offset, and 2.5kB if we use the masks. In a data heavy environment, and since we won't register many many more parameters this doesn't matter. For 1000 parameters it is still ok, 10^6 will result in some pretty big differences.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets typedef the actual type so we can change later.