Conversation
| ops_module = importlib.import_module(f"_{vendor_name}.ops") | ||
| return importlib.import_module(module_name) | ||
| except ModuleNotFoundError: | ||
| print( |
There was a problem hiding this comment.
Consider change this to logger.info ?
There was a problem hiding this comment.
If except is entered, this [note] need to be prompted to the user in any case.
There was a problem hiding this comment.
Even in that case, the error message are not supposed to be printed on stdout.
This module is in the core of the project, and the whole project can be scripted.
I have noticed this unconditional print when writing test scripts.
By the way, it should be fine (not good though ...) if a vendor has no operators implemented.
| if _state.device_name in ("musa", "aipu", "npu", "txda", "ptpu", "gcu"): | ||
| _state.torch_device_fn_device = None |
There was a problem hiding this comment.
I don't like this logic ... it is hard to maintain.
Please consider using a try ... except struct here...
There was a problem hiding this comment.
using a try ... except struct here doesn‘t conform to the original intention of the code. torch_device_fn_device is not necessarily non-existent. It could be that the vendor doesn't want to use it, or it might not be needed at all
There was a problem hiding this comment.
I mean 'except: pass'.
Don't enumerate the device name here ...
no one can remember that we planted some hardcoded device specific code here.
| vendor_module = get_module("_" + vendor_name) | ||
| return vendor_module | ||
| if _state.vendor_module is None: | ||
| _state.vendor_module = get_module("_" + vendor_name) |
There was a problem hiding this comment.
this get_module is exception free?
There was a problem hiding this comment.
Yes, in principle, there is no exception. If there is an exception for unknown reasons, it must be reported.
There was a problem hiding this comment.
there are in general two approaches ...
either you catch all exceptions in the get_module so the function never throws an exception, and you may get an empty string here (check it);
or you allow it to throw an exception and you will catch that exception in the call chain.
Refactor error messages for clarity and consistency. Signed-off-by: Galaxy1458 <[email protected]>
| try: | ||
| _state.heuristic_config_module = importlib.import_module(mod_name) | ||
| except Exception: | ||
| continue |
There was a problem hiding this comment.
should be 'pass'?
| continue | |
| pass |
| @@ -1,148 +1,104 @@ | |||
| import os | |||
There was a problem hiding this comment.
File mode changed from 644 to 755, pls fix.
| UNSUPPORT_FP64, | ||
| UNSUPPORT_INT64, | ||
| vendors, | ||
| ) |
There was a problem hiding this comment.
cool. This should be extracted.
| if hasattr(self, "initialized"): | ||
| return |
There was a problem hiding this comment.
nit. personally I prefer your previous implementation that make the instance always have an initialized attribute. We only test if that attribute is true or not, rather than whether the instance has a specific attribute.
| self.info = self.get_vendor(vendor_name) | ||
| self.vendor_name = self.info.vendor_name | ||
| self.name = self.info.device_name | ||
| self.vendor = vendors.get_all_vendors()[self.vendor_name] |
There was a problem hiding this comment.
always be careful when using [] ... it is disgusting that it may throw exceptions, use get(key, default) whenever possible.
| self.device_count = backend.gen_torch_device_object( | ||
| self.vendor_name | ||
| ).device_count() | ||
| self.support_fp64 = self.vendor not in UNSUPPORT_FP64 |
There was a problem hiding this comment.
sigh ... this feature should be pushed down to vendor layers ... let's leave it to a future PR
| if hasattr(torch, attr): | ||
| return vendor_name | ||
| try: | ||
| import torch_npu |
There was a problem hiding this comment.
This damn import is still here ...
| except Exception: | ||
| return None | ||
|
|
||
| with ThreadPoolExecutor() as executor: |
There was a problem hiding this comment.
:) Cannot imagine that we are using multiple threads to do this.
| vendors.CAMBRICON, | ||
| vendors.ILUVATAR, | ||
| vendors.KUNLUNXIN, | ||
| vendors.MTHREADS, | ||
| vendors.AIPU, | ||
| vendors.ASCEND, | ||
| vendors.TSINGMICRO, | ||
| vendors.SUNRISE, | ||
| vendors.ENFLAME, |
| vendors.AIPU, | ||
| vendors.TSINGMICRO, | ||
| vendors.SUNRISE, | ||
| vendors.ENFLAME, |
| "iluvatar": "corex", | ||
| "ascend": "npu", | ||
| "sunrise": "ptpu", | ||
| "enflame": "gcu", |
| self.arch_specialized_yaml_config = archEvent.autotune_configs | ||
| self.arch_heuristics_config = archEvent.heuristics_configs | ||
| except Exception as err: | ||
| print(f"[INFO] : {err}") |
There was a problem hiding this comment.
Same suggestion here. Log an error or an warning please.
| if self.device.vendor_name == "hygon": | ||
| kwargs["num_ldmatrixes"] = current_config["num_ldmatrixes"] |
There was a problem hiding this comment.
Maybe a more generic ways is something like:
| if self.device.vendor_name == "hygon": | |
| kwargs["num_ldmatrixes"] = current_config["num_ldmatrixes"] | |
| kwargs["num_ldmatrixes"] = current_config.get("num_ldmatrixes", None) |
The thing is that in this core module, we don't treat any vendor as a special beast.
| num_stages=current_config["num_stages"], | ||
| num_ctas=current_config["num_ctas"], | ||
| ) | ||
| ) |
PR Category
Type of Change
Description
Issue
Progress
Performance