|
6 | 6 | #include <algorithm> |
7 | 7 | #include <climits> |
8 | 8 | #include <variant> |
9 | | -#include <iostream> |
10 | | -#include <iomanip> |
11 | 9 |
|
12 | 10 | #include "intel_gpu/graph/network.hpp" |
13 | 11 | #include "intel_gpu/primitives/input_layout.hpp" |
@@ -280,105 +278,23 @@ struct weightless_cache_manager { |
280 | 278 | std::make_shared<ov::op::v0::Constant>(original_dtype, shape, get_intermediate_data(), shared_buf); |
281 | 279 | } |
282 | 280 |
|
283 | | - // Debug: Compare ConvertPrecision vs Convert+ConstantFolding for 4-bit types |
284 | | - bool is_4bit_conversion = (original_dtype == ov::element::u4 || original_dtype == ov::element::i4) && |
285 | | - (curr_dtype == ov::element::u8 || curr_dtype == ov::element::i8); |
286 | | - |
287 | | - if (is_4bit_conversion) { |
288 | | - std::cerr << "[4-bit Debug] Processing 4-bit conversion: " |
289 | | - << original_dtype << " -> " << curr_dtype |
290 | | - << ", shape: " << shape << std::endl; |
291 | | - // Method 1: Using ConvertPrecision transformation |
292 | | - ov::ParameterVector inputParams1; |
293 | | - ov::ResultVector results1; |
294 | | - results1.push_back(std::make_shared<ov::op::v0::Result>(orig_constant->output(0))); |
295 | | - auto model1 = std::make_shared<ov::Model>(results1, inputParams1, "convert_precision_model"); |
296 | | - |
297 | | - ov::pass::Manager manager1("ConvertPrecision"); |
298 | | - manager1.register_pass<ov::pass::ConvertPrecision>(original_dtype, curr_dtype); |
299 | | - manager1.run_passes(model1); |
300 | | - |
301 | | - const auto& ops1 = model1->get_ops(); |
302 | | - auto it1 = std::find_if(ops1.begin(), ops1.end(), [](const std::shared_ptr<ov::Node>& node) { |
303 | | - return ov::op::util::is_constant(node); |
304 | | - }); |
305 | | - OPENVINO_ASSERT(it1 != ops1.end()); |
306 | | - auto constant1 = ov::as_type_ptr<ov::op::v0::Constant>(*it1); |
307 | | - |
308 | | - // Method 2: Using Convert + ConstantFolding |
309 | | - ov::ParameterVector inputParams2; |
310 | | - ov::ResultVector results2; |
311 | | - auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype); |
312 | | - results2.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0))); |
313 | | - auto model2 = std::make_shared<ov::Model>(results2, inputParams2, "convert_folding_model"); |
314 | | - |
315 | | - ov::pass::Manager manager2("Convert+ConstantFolding"); |
316 | | - manager2.register_pass<ov::pass::ConstantFolding>(); |
317 | | - manager2.run_passes(model2); |
318 | | - |
319 | | - const auto& ops2 = model2->get_ops(); |
320 | | - auto it2 = std::find_if(ops2.begin(), ops2.end(), [](const std::shared_ptr<ov::Node>& node) { |
321 | | - return ov::op::util::is_constant(node); |
322 | | - }); |
323 | | - OPENVINO_ASSERT(it2 != ops2.end()); |
324 | | - auto constant2 = ov::as_type_ptr<ov::op::v0::Constant>(*it2); |
325 | | - |
326 | | - // Compare results |
327 | | - auto data1 = reinterpret_cast<const uint8_t*>(constant1->get_data_ptr()); |
328 | | - auto data2 = reinterpret_cast<const uint8_t*>(constant2->get_data_ptr()); |
329 | | - size_t byte_size = constant1->get_byte_size(); |
330 | | - |
331 | | - bool mismatch = false; |
332 | | - for (size_t i = 0; i < byte_size && i < 16; ++i) { |
333 | | - if (data1[i] != data2[i]) { |
334 | | - mismatch = true; |
335 | | - break; |
336 | | - } |
337 | | - } |
338 | | - |
339 | | - if (mismatch) { |
340 | | - std::cerr << "[4-bit Nibble Order Debug] Mismatch detected!" << std::endl; |
341 | | - std::cerr << " Original type: " << original_dtype << " -> Target type: " << curr_dtype << std::endl; |
342 | | - std::cerr << " Shape: " << shape << std::endl; |
343 | | - std::cerr << " First 16 bytes comparison:" << std::endl; |
344 | | - std::cerr << " ConvertPrecision: "; |
345 | | - for (size_t i = 0; i < std::min(byte_size, size_t(16)); ++i) { |
346 | | - std::cerr << std::hex << std::setw(2) << std::setfill('0') << (int)data1[i] << " "; |
347 | | - } |
348 | | - std::cerr << std::endl; |
349 | | - std::cerr << " Convert+ConstFolding: "; |
350 | | - for (size_t i = 0; i < std::min(byte_size, size_t(16)); ++i) { |
351 | | - std::cerr << std::hex << std::setw(2) << std::setfill('0') << (int)data2[i] << " "; |
352 | | - } |
353 | | - std::cerr << std::endl; |
354 | | - |
355 | | - // Use ConvertPrecision result for now as it might be the correct one |
356 | | - transformed_constant = constant1; |
357 | | - } else { |
358 | | - // No mismatch, use either result |
359 | | - transformed_constant = constant2; |
360 | | - } |
361 | | - } else { |
362 | | - // Original code for non-4bit conversions |
363 | | - ov::ParameterVector inputParams; |
364 | | - ov::ResultVector results; |
365 | | - ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); |
366 | | - std::shared_ptr<ov::Model> model = nullptr; |
367 | | - |
368 | | - auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype); |
369 | | - results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0))); |
370 | | - model = std::make_shared<ov::Model>(results, inputParams, "aux"); |
371 | | - manager.register_pass<ov::pass::ConstantFolding>(); |
372 | | - |
373 | | - manager.run_passes(model); |
374 | | - const auto& ops = model->get_ops(); |
375 | | - auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr<ov::Node>& node) { |
376 | | - return ov::op::util::is_constant(node); |
377 | | - }); |
378 | | - OPENVINO_ASSERT(it != ops.end()); |
379 | | - transformed_constant = ov::as_type_ptr<ov::op::v0::Constant>(*it); |
380 | | - } |
381 | | - |
| 281 | + ov::ParameterVector inputParams; |
| 282 | + ov::ResultVector results; |
| 283 | + ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); |
| 284 | + std::shared_ptr<ov::Model> model = nullptr; |
| 285 | + |
| 286 | + auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype); |
| 287 | + results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0))); |
| 288 | + model = std::make_shared<ov::Model>(results, inputParams, "aux"); |
| 289 | + manager.register_pass<ov::pass::ConstantFolding>(); |
| 290 | + |
| 291 | + manager.run_passes(model); |
| 292 | + const auto& ops = model->get_ops(); |
| 293 | + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr<ov::Node>& node) { |
| 294 | + return ov::op::util::is_constant(node); |
| 295 | + }); |
| 296 | + OPENVINO_ASSERT(it != ops.end()); |
| 297 | + transformed_constant = ov::as_type_ptr<ov::op::v0::Constant>(*it); |
382 | 298 | OPENVINO_ASSERT(transformed_constant->get_element_type() == curr_dtype); |
383 | 299 | } |
384 | 300 |
|
|
0 commit comments