Skip to content

Commit

Permalink
revert
Browse files Browse the repository at this point in the history
  • Loading branch information
bassmang committed Nov 10, 2023
1 parent 4543386 commit 4045540
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 99 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/vendor_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
build_type: ["Debug", "Release"]
compiler:
- { cc: "gcc", cxx: "g++"}
#- { cc: "clang", cxx: "clang++"}
- { cc: "clang", cxx: "clang++"}
runs-on: ${{matrix.os}}
steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -56,8 +56,6 @@ jobs:
run: ctest --output-on-failure --no-tests=error --label-regex VWTestList --parallel 2
- name: Test
run: python3 test/run_tests.py -f --clean_dirty -E 0.001
env:
NUM_JOBS: 1
build_vendor_windows:
name: core-cli.${{ matrix.os }}.amd64.${{ matrix.build_type }}.msvc.standalone
runs-on: ${{matrix.os}}
Expand Down
85 changes: 84 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,88 @@
],
"url": "./test/vwtest.schema.json"
}
]
],
"files.associations": {
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"csignal": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"*.ipp": "cpp",
"any": "cpp",
"array": "cpp",
"atomic": "cpp",
"hash_map": "cpp",
"hash_set": "cpp",
"strstream": "cpp",
"bit": "cpp",
"*.tcc": "cpp",
"bitset": "cpp",
"cfenv": "cpp",
"charconv": "cpp",
"chrono": "cpp",
"cinttypes": "cpp",
"codecvt": "cpp",
"compare": "cpp",
"complex": "cpp",
"concepts": "cpp",
"condition_variable": "cpp",
"cstdint": "cpp",
"deque": "cpp",
"forward_list": "cpp",
"list": "cpp",
"map": "cpp",
"set": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"unordered_set": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"regex": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"fstream": "cpp",
"future": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"mutex": "cpp",
"new": "cpp",
"numbers": "cpp",
"ostream": "cpp",
"ranges": "cpp",
"semaphore": "cpp",
"span": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"stop_token": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"typeindex": "cpp",
"typeinfo": "cpp",
"valarray": "cpp",
"variant": "cpp"
}
}
4 changes: 2 additions & 2 deletions test/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ def main():
"-j",
"--jobs",
type=int,
default=1,
default=os.cpu_count(),
help="Number of tests to run in parallel. Default is current machine core count.",
)
parser.add_argument(
Expand Down Expand Up @@ -1234,7 +1234,7 @@ def main():
tasks: List[Future[TestOutcome]] = []
completed_tests = Completion()

executor = ThreadPoolExecutor(max_workers=1)
executor = ThreadPoolExecutor(max_workers=args.jobs)

for test in tests:
tasks.append(
Expand Down
16 changes: 1 addition & 15 deletions vowpalwabbit/core/include/vw/core/gd_predict.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "vw/core/example_predict.h"
#include "vw/core/interactions_predict.h"
#include "vw/core/v_array.h"
#include <iostream>

#undef VW_DEBUG_LOG
#define VW_DEBUG_LOG vw_dbg::GD_PREDICT
Expand Down Expand Up @@ -38,15 +37,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da
for (const auto& f : fs)
{
VW::weight& w = weights[(f.index() + offset)];
//std::cout << "Upd Index: " << f.index() << "\n";
//std::cout << "Upd Value: " << f.value() << "\n";
//std::cout << "Upd Mult: " << mult << "\n";
FuncT(dat, mult * f.value(), w);
//VW::weight* w_ptr = &w;
//std::cout << "Upd w[0]: " << w_ptr[0] << "\n";
//std::cout << "Upd w[1]: " << w_ptr[1] << "\n";
//std::cout << "Upd w[2]: " << w_ptr[2] << "\n";
//std::cout << "Upd w[3]: " << w_ptr[3] << "\n";
}
}

Expand All @@ -55,12 +46,7 @@ template <class DataT, void (*FuncT)(DataT&, float, float), class WeightsT>
inline void foreach_feature(
const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.)
{
for (const auto& f : fs)
{
//std::cout << "Pred Index: " << f.index() << "\n";
//std::cout << "Pred Weight: " << weights[static_cast<size_t>(f.index() + offset)] << "\n";
FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]);
}
for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]); }
}

template <class DataT, class WeightOrIndexT, void (*FuncT)(DataT&, float, WeightOrIndexT),
Expand Down
29 changes: 10 additions & 19 deletions vowpalwabbit/core/src/loss_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include <cfloat>
#include <cmath>
#include <cstdlib>
#include <iostream> // Make sure to include this at the top of your file

namespace
{
Expand Down Expand Up @@ -43,24 +42,16 @@ inline float squared_loss_impl_get_loss(const VW::shared_data* sd, float predict

inline float squared_loss_impl_get_update(float prediction, float label, float update_scale, float pred_per_update)
{
//std::cout << "Prediction: " << prediction << ", Label: " << label << ", Update Scale: " << update_scale
// << ", Pred Per Update: " << pred_per_update << "\n";

if (update_scale * pred_per_update < 1e-6)
{
// std::cout << "Entering first branch (update_scale * pred_per_update < 1e-6)" << "\n";
float update = 2.f * (label - prediction) * update_scale;
// std::cout << "Update (first branch): " << update << "\n";
return update;
}

// std::cout << "Entering second branch" << "\n";
float exp_component = VW::details::correctedExp(-2.f * update_scale * pred_per_update);
// std::cout << "Exp Component: " << exp_component << "\n";
float update = (label - prediction) * (1.f - exp_component) / pred_per_update;
// std::cout << "Update (second branch): " << update << "\n";

return update;
if (update_scale * pred_per_update < 1e-6)
{
/* When exp(-eta_t)~= 1 we replace 1-exp(-eta_t)
* with its first order Taylor expansion around 0
* to avoid catastrophic cancellation.
*/
return 2.f * (label - prediction) * update_scale;
}
return (label - prediction) * (1.f - VW::details::correctedExp(-2.f * update_scale * pred_per_update)) /
pred_per_update;
}

inline float squared_loss_impl_get_unsafe_update(float prediction, float label, float update_scale)
Expand Down
71 changes: 12 additions & 59 deletions vowpalwabbit/core/src/reductions/gd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include <algorithm>
#include <cfloat>
#include <iostream>
#include <iomanip>

#if !defined(VW_NO_INLINE_SIMD)
# if !defined(__SSE2__) && (defined(_M_AMD64) || defined(_M_X64))
Expand Down Expand Up @@ -166,19 +164,15 @@ static inline float inv_sqrt(float x)
// N-R iteration 2
float32x2_t e3 = vmul_f32(e2, vrsqrts_f32(v1, vmul_f32(e2, e2)));
// Extract result
std::cout << "__ARM_NEON__" << "\n";
return vget_lane_f32(e3, 0);
# elif defined(__SSE2__)
__m128 eta = _mm_load_ss(&x);
eta = _mm_rsqrt_ss(eta);
_mm_store_ss(&x, eta);
std::cout << "__SSE2__" << "\n";
# else
std::cout << "None" << "\n";
x = quake_inv_sqrt(x);
# endif
#else
std::cout << "VW_NO_INLINE_SIMD" << "\n";
x = quake_inv_sqrt(x);
#endif

Expand All @@ -194,12 +188,7 @@ inline void update_feature(float& update, float x, float& fw)
bool modify = x < FLT_MAX && x > -FLT_MAX && (feature_mask_off || fw != 0.);
if (modify)
{
if VW_STD17_CONSTEXPR (spare != 0)
{
//std::cout << "Upd spare: " << w[spare] << "\n";
x *= w[spare];
}
//std::cout << "Upd update: " << update << "\n";
if VW_STD17_CONSTEXPR (spare != 0) { x *= w[spare]; }
w[0] += update * x;
}
}
Expand Down Expand Up @@ -630,42 +619,22 @@ class power_data
template <bool sqrt_rate, size_t adaptive, size_t normalized>
inline float compute_rate_decay(power_data& s, float& fw)
{
std::cout << std::fixed << std::setprecision(10); // Set high precision for floating-point output

VW::weight* w = &fw;
//std::cout << "Input fw: " << fw << std::endl;

float rate_decay = 1.f;
if (adaptive)
{
if (sqrt_rate)
{
rate_decay = inv_sqrt(w[adaptive]);
//std::cout << "Rate decay after inv_sqrt: " << rate_decay << " (inv_sqrt of " << w[adaptive] << ")" << std::endl;
}
else
{
rate_decay = powf(w[adaptive], s.minus_power_t);
//std::cout << "Rate decay after powf (adaptive): " << rate_decay << " (powf of " << w[adaptive] << " ^ " << s.minus_power_t << ")" << std::endl;
}
if (sqrt_rate) { rate_decay = inv_sqrt(w[adaptive]); }
else { rate_decay = powf(w[adaptive], s.minus_power_t); }
}
if VW_STD17_CONSTEXPR (normalized != 0)
{
if (sqrt_rate)
{
float inv_norm = 1.f / w[normalized];
//std::cout << "Intermediate inv_norm: " << inv_norm << std::endl;

if (adaptive) { rate_decay *= inv_norm; }
else { rate_decay *= inv_norm * inv_norm; }

//std::cout << "Rate decay after normalization (sqrt_rate): " << rate_decay << std::endl;
}
else
{
rate_decay *= powf(w[normalized] * w[normalized], s.neg_norm_power);
//std::cout << "Rate decay after powf (normalized): " << rate_decay << " (powf of " << w[normalized] << " * " << w[normalized] << " ^ " << s.neg_norm_power << ")" << std::endl;
}
else { rate_decay *= powf(w[normalized] * w[normalized], s.neg_norm_power); }
}
return rate_decay;
}
Expand Down Expand Up @@ -745,53 +714,39 @@ template <bool sqrt_rate, bool feature_mask_off, bool adax, size_t adaptive, siz
bool stateless>
float get_pred_per_update(VW::reductions::gd& g, VW::example& ec)
{
std::cout << std::fixed << std::setprecision(10); // Set high precision for floating-point output

// We must traverse the features in _precisely_ the same order as during training.
auto& ld = ec.l.simple;
VW::workspace& all = *g.all;

float grad_squared = ec.weight;
if (!adax) {
grad_squared *= all.loss_config.loss->get_square_grad(ec.pred.scalar, ld.label);
//std::cout << "Grad Squared: " << grad_squared << std::endl;
}
if (!adax) { grad_squared *= all.loss_config.loss->get_square_grad(ec.pred.scalar, ld.label); }

if (grad_squared == 0 && !stateless) {
std::cout << "Returning early due to grad_squared == 0 and stateless == false" << std::endl;
return 1.;
}
if (grad_squared == 0 && !stateless) { return 1.; }

norm_data nd = {grad_squared, 0., 0., {g.neg_power_t, g.neg_norm_power}, {0}, &g.all->logger};
// Print values in nd here if needed

VW::foreach_feature<norm_data, pred_per_update_feature<sqrt_rate, feature_mask_off, adaptive, normalized, spare, stateless>>(all, ec, nd);
// Add prints inside VW::foreach_feature if possible to check individual feature contributions

VW::foreach_feature<norm_data,
pred_per_update_feature<sqrt_rate, feature_mask_off, adaptive, normalized, spare, stateless>>(all, ec, nd);
if VW_STD17_CONSTEXPR (normalized != 0)
{
if (!stateless)
{
g.current_model_state->normalized_sum_norm_x += (static_cast<double>(ec.weight)) * nd.norm_x;
g.current_model_state->total_weight += ec.weight;
g.update_multiplier = average_update<sqrt_rate, adaptive, normalized>(
static_cast<float>(g.current_model_state->total_weight),
static_cast<float>(g.current_model_state->normalized_sum_norm_x), g.neg_norm_power);
g.update_multiplier =
average_update<sqrt_rate, adaptive, normalized>(static_cast<float>(g.current_model_state->total_weight),
static_cast<float>(g.current_model_state->normalized_sum_norm_x), g.neg_norm_power);
}
else
{
float nsnx = (static_cast<float>(g.current_model_state->normalized_sum_norm_x)) + ec.weight * nd.norm_x;
float tw = static_cast<float>(g.current_model_state->total_weight) + ec.weight;
g.update_multiplier = average_update<sqrt_rate, adaptive, normalized>(tw, nsnx, g.neg_norm_power);
}
//std::cout << "Update Multiplier: " << g.update_multiplier << std::endl;
nd.pred_per_update *= g.update_multiplier;
}

//std::cout << "Pred Per Update: " << nd.pred_per_update << std::endl;
return nd.pred_per_update;
}


template <bool sqrt_rate, bool feature_mask_off, bool adax, size_t adaptive, size_t normalized, size_t spare,
bool stateless>
float sensitivity(VW::reductions::gd& g, VW::example& ec)
Expand Down Expand Up @@ -889,8 +844,6 @@ void update(VW::reductions::gd& g, VW::example& ec)
if ((update = compute_update<sparse_l2, invariant, sqrt_rate, feature_mask_off, adax, adaptive, normalized, spare>(
g, ec)) != 0.)
{
//std::cout << "Mult: " << g.update_multiplier << "\n";
//std::cout << "Update: " << update << "\n";
train<sqrt_rate, feature_mask_off, adaptive, normalized, spare>(g, ec, update);
}

Expand Down

0 comments on commit 4045540

Please sign in to comment.