-
-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AnyParameterProperties refactor #4412
Changes from 2 commits
0a8628d
c505c2a
3aba095
476d2a9
3e32f1c
dfb5aea
2bf79c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,101 +28,85 @@ namespace shogun | |
GRADIENT_AVAILABLE = 1 | ||
}; | ||
/** @brief Class AnyParameterProperties keeps track of parameter properties. | ||
* | ||
* Currently there are three types of parameters: | ||
* -# HYPERPARAMETER: the parameter determines how the training is | ||
* performed, e.g. regularisation | ||
* -# GRADIENT_PARAM: the parameter is used in gradient updates | ||
* -# MODEL_PARAM: model parameter that is trained, e.g. weight and bias | ||
* vectors and bias | ||
* The parameter properties can be either true or false. | ||
* These properties describe if a parameter is for example a hyperparameter | ||
* or if it has a gradient. | ||
*/ | ||
class AnyParameterProperties | ||
{ | ||
public: | ||
static const int32_t HYPERPARAMETER = 0x00000001; | ||
static const int32_t GRADIENT_PARAM = 0x00000010; | ||
static const int32_t MODEL_PARAM = 0x00000100; | ||
static const int32_t HYPER = 1u << 0; | ||
static const int32_t GRADIENT = 1u << 1; | ||
static const int32_t MODEL = 1u << 2; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah wait. the thing I had in mind was
and then
This is now difference, as you are effectively storing masks themselves in these const members, not the number of bit shifts. Or am I wrong? Let me stare a bit more to understand There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, but start at 1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, if we have:
Then the above mask will always be false/0 no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean more like this:
which is the equivalent of: shift bit at position X (1,2 or 3 in this case) if the function parameter is true. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah now I remember why I did it this way.. Basically if you want to provide an interface to create your own mask like this
you need to have the values of the static member with the shift. Unless I am missing something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what you mean, but what I have seen (and been doing) would result in
It is not as neat, you are totally right!. BUT it can be hidden quite easily either. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK! Btw why does it need to start at 1, instead of 0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, no that was nonsense (just needed to make sure that you never get an all zero mask) BTW I also realised why the micro-controller people store the number of bits rather than the full masks: it uses less memory (which is not really relevant here, but still good to think about it for a moment).
so with an 8bit integer, you can do 255 attributes if you generate the masks on the fly a la How many parameters are registered during a shogun run? Let's say 100 is realistic. Then we have 100 byte usage if we store only the offset, and 2.5kB if we use the masks. In a data heavy environment, and since we won't register many many more parameters this doesn't matter. For 1000 parameters it is still ok, 10^6 will result in some pretty big differences. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets typedef the actual type so we can change later. |
||
|
||
/** default constructor where all parameter types are switched to false | ||
/** Default constructor where all parameter properties are false | ||
*/ | ||
AnyParameterProperties() : m_description(), m_mask_attribute(0x00000000) | ||
{ | ||
} | ||
/** legacy constructor */ | ||
AnyParameterProperties() | ||
: m_description("No description given"), m_attribute_mask(0) | ||
{ | ||
} | ||
/** Constructor | ||
* @param description parameter description | ||
* @param hyperparameter set to true for parameters that determine | ||
* how training is performed, e.g. regularisation parameters | ||
* @param gradient set to true for parameters required for gradient | ||
* updates | ||
* @param model set to true for parameters used in inference, e.g. | ||
* weights and bias | ||
* */ | ||
AnyParameterProperties( | ||
std::string description, | ||
EModelSelectionAvailability model_selection = MS_NOT_AVAILABLE, | ||
EModelSelectionAvailability hyperparameter = MS_NOT_AVAILABLE, | ||
EGradientAvailability gradient = GRADIENT_NOT_AVAILABLE, | ||
bool hyperparameter = false) | ||
: m_description(description), m_model_selection(model_selection), | ||
bool model = false) | ||
: m_description(description), m_model_selection(hyperparameter), | ||
m_gradient(gradient) | ||
{ | ||
m_mask_attribute = 0x00000000; | ||
if (model_selection) | ||
m_mask_attribute |= MODEL_PARAM; | ||
if (gradient) | ||
m_mask_attribute |= GRADIENT_PARAM; | ||
if (hyperparameter) | ||
m_mask_attribute |= HYPERPARAMETER; | ||
m_attribute_mask = (hyperparameter << 0 & HYPER) | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was more thinking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh that's too low-level. Can we introduce a constexpr function for that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah actually good point! :) |
||
(gradient << 1 & GRADIENT) | | ||
(model << 2 & MODEL); | ||
} | ||
/** mask constructor */ | ||
AnyParameterProperties(std::string description, int32_t mask_attribute) | ||
/** Mask constructor | ||
* @param description parameter description | ||
* @param attribute_mask mask encoding parameter properties | ||
* */ | ||
AnyParameterProperties(std::string description, int32_t attribute_mask) | ||
: m_description(description) | ||
{ | ||
m_mask_attribute = mask_attribute; | ||
m_attribute_mask = attribute_mask; | ||
} | ||
/** copy contructor */ | ||
/** Copy contructor */ | ||
AnyParameterProperties(const AnyParameterProperties& other) | ||
: m_description(other.m_description), | ||
m_model_selection(other.m_model_selection), | ||
m_gradient(other.m_gradient), | ||
m_mask_attribute(other.m_mask_attribute) | ||
m_attribute_mask(other.m_attribute_mask) | ||
{ | ||
} | ||
/** description getter */ | ||
std::string get_description() const | ||
const std::string& get_description() const | ||
{ | ||
return m_description; | ||
} | ||
/** model selection flag getter */ | ||
EModelSelectionAvailability get_model_selection() const | ||
{ | ||
EModelSelectionAvailability return_val; | ||
if (m_mask_attribute & HYPERPARAMETER) | ||
return_val = EModelSelectionAvailability::MS_AVAILABLE; | ||
else | ||
return_val = EModelSelectionAvailability::MS_NOT_AVAILABLE; | ||
return return_val; | ||
return static_cast<EModelSelectionAvailability>( | ||
(m_attribute_mask & HYPER) > 0); | ||
karlnapf marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** gradient flag getter */ | ||
EGradientAvailability get_gradient() const | ||
{ | ||
EGradientAvailability return_val; | ||
if (m_mask_attribute & GRADIENT_PARAM) | ||
return_val = EGradientAvailability::GRADIENT_AVAILABLE; | ||
else | ||
return_val = EGradientAvailability::GRADIENT_NOT_AVAILABLE; | ||
return return_val; | ||
return static_cast<EGradientAvailability>( | ||
(m_attribute_mask & GRADIENT) > 0); | ||
} | ||
|
||
/** hyperparameter flag getter */ | ||
bool get_hyperparameter() const | ||
bool get_model() const | ||
{ | ||
bool return_val; | ||
if (m_mask_attribute & HYPERPARAMETER) | ||
return_val = true; | ||
else | ||
return_val = false; | ||
return return_val; | ||
return static_cast<bool>(m_attribute_mask & MODEL); | ||
} | ||
|
||
private: | ||
std::string m_description; | ||
EModelSelectionAvailability m_model_selection; | ||
EGradientAvailability m_gradient; | ||
/** mask to store all param flags*/ | ||
int32_t m_mask_attribute; | ||
int32_t m_attribute_mask; | ||
}; | ||
|
||
class AnyParameter | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
like!
Maybe also mention descriptions in the first sentence " ... of parameter meta information, such as properties and descriptions"