Skip to content

Commit

Permalink
allow llama.
Browse files Browse the repository at this point in the history
  • Loading branch information
boranhan committed Nov 10, 2024
1 parent e0a908a commit 1f35f8d
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 46 deletions.
12 changes: 2 additions & 10 deletions src/autogluon_assistant/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,7 @@ def preprocess_task(self, task: TabularPredictionTask) -> TabularPredictionTask:
task = self.inference_task(task)
if self.feature_transformers_config:
logger.info("Automatic feature generation starts...")
if "OPENAI_API_KEY" not in os.environ:
logger.info("No OpenAI API keys found, therefore, skip CAAFE")
fe_transformers = [
instantiate(ft_config)
for ft_config in self.feature_transformers_config
if ft_config["_target_"] != "autogluon_assistant.transformer.CAAFETransformer"
]
else:
fe_transformers = [instantiate(ft_config) for ft_config in self.feature_transformers_config]
fe_transformers = [instantiate(ft_config) for ft_config in self.feature_transformers_config]
for fe_transformer in fe_transformers:
try:
with timeout(
Expand All @@ -122,7 +114,7 @@ def preprocess_task(self, task: TabularPredictionTask) -> TabularPredictionTask:
self.handle_exception(f"Task preprocessing: {fe_transformer.name}", e)
logger.info("Automatic feature generation complete!")
else:
logger.info("Automatic feature generation is disabled.")
logger.info("Automatic feature generation is disabled. ")
return task

def fit_predictor(self, task: TabularPredictionTask):
Expand Down
2 changes: 1 addition & 1 deletion src/autogluon_assistant/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def get_bedrock_models() -> List[str]:
return [
model["modelId"]
for model in response["modelSummaries"]
if model["modelId"].startswith("anthropic.claude")
if model["modelId"].startswith("anthropic.claude") or model["modelId"].startswith("meta.llama")
]
except Exception as e:
print(f"Error fetching Bedrock models: {e}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,48 +29,46 @@ def get_device_info():
return DeviceInfo(cpu_count, gpu_devices)


def huggingface_run(model, data):
if all(isinstance(x, str) for x in data) and any(len(x.split(" ")) > 10 for x in data):
data = np.where(pd.isna(data), "", data)
return model.encode(data).astype("float32")
else:
return np.zeros(len(data))


def glove_run_one_proc(model, data):
embeddings = []
if all(isinstance(x, str) for x in data) and any(len(x.split(" ")) > 10 for x in data):
for text in data:
token_list = list(tokenize(text))
embed = model.get_mean_vector(token_list)
embeddings.append(embed)
else:
return np.zeros(len(data))
return np.stack(embeddings).astype("float32")


class PretrainedEmbeddingTransformer(BaseFeatureTransformer):
def __init__(self, model_name, **kwargs) -> None:
self.model_name = model_name
if torch.cuda.is_available():
try:
self.model = SentenceTransformer(self.model_name)
except:
logger.warning(f"No model {self.model_name} is found.")
self.model_name = model_name

else:
if not torch.cuda.is_available():
logger.warning("CUDA is not found. For an optimized user experience, we switched to the glove embeddings")
self.model_name = "glove-wiki-gigaword"
self.dim = 300
self.max_num_procs = 16
self.cpu_count = int(os.environ.get("NUM_VISIBLE_CPUS", os.cpu_count()))

def _fit_dataframes(self, train_X: pd.DataFrame, train_y: pd.Series, **kwargs) -> None:
pass

def glove_run_one_proc(self, data):
embeddings = []
if all(isinstance(x, str) for x in data) and any(len(x.split(" ")) > 10 for x in data):
try:
self.model = api.load(f"{self.model_name}-{self.dim}")
except:
logger.warning(f"No model {self.model_name}-{self.dim} is found.")
self.cpu_count = int(os.environ.get("NUM_VISIBLE_CPUS", os.cpu_count()))
for text in data:
token_list = list(tokenize(text))
embed = self.model.get_mean_vector(token_list)
embeddings.append(embed)
else:
return np.zeros(len(data))
return np.stack(embeddings).astype("float32")

def _fit_dataframes(self, train_X: pd.DataFrame, train_y: pd.Series, **kwargs) -> None:
pass
def huggingface_run(self, data):
if all(isinstance(x, str) for x in data) and any(len(x.split(" ")) > 10 for x in data):
try:
self.model = SentenceTransformer(self.model_name)
except:
logger.warning(f"No model {self.model_name} is found.")
data = np.where(pd.isna(data), "", data)
return self.model.encode(data).astype("float32")
else:
return np.zeros(len(data))

def _transform_dataframes(self, train_X: pd.DataFrame, test_X: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
assert (
Expand All @@ -79,14 +77,14 @@ def _transform_dataframes(self, train_X: pd.DataFrame, test_X: pd.DataFrame) ->

for series_name in train_X.columns.values.tolist():
if torch.cuda.is_available():
transformed_train_column = huggingface_run(self.model, np.transpose(train_X[series_name].to_numpy()).T)
transformed_test_column = huggingface_run(self.model, np.transpose(test_X[series_name].to_numpy()).T)
transformed_train_column = self.huggingface_run(np.transpose(train_X[series_name].to_numpy()).T)
transformed_test_column = self.huggingface_run(np.transpose(test_X[series_name].to_numpy()).T)
else:
transformed_train_column = glove_run_one_proc(
self.model, np.transpose(train_X[series_name].to_numpy()).T
transformed_train_column = self.glove_run_one_proc(
np.transpose(train_X[series_name].to_numpy()).T
)
transformed_test_column = glove_run_one_proc(
self.model, np.transpose(test_X[series_name].to_numpy()).T
transformed_test_column = self.glove_run_one_proc(
np.transpose(test_X[series_name].to_numpy()).T
)

if transformed_train_column.any() and transformed_test_column.any():
Expand Down

0 comments on commit 1f35f8d

Please sign in to comment.