Skip to content

Commit

Permalink
arabesque dataset initial version added
Browse files Browse the repository at this point in the history
  • Loading branch information
saeid93 committed Jun 19, 2022
1 parent 93dd615 commit da8e5c0
Show file tree
Hide file tree
Showing 19 changed files with 329 additions and 98 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -240,5 +240,6 @@ data/train-results
data/plots
data/clusters
data.zip
data/arabesque

reward_scaling_samples_1
reward_scaling_samples_1
18 changes: 12 additions & 6 deletions data/configs/check/check_env.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,18 @@
"penalty_v": 1,
"penalty_g": 1,
"penalty_p": 1,
"reward_var_illegal": 1.05,
"reward_var_u": 1.05,
"reward_var_c": 1.05,
"reward_var_v": 1.05,
"reward_var_g": 1.05,
"reward_var_p": 1.05,
"reward_var_illegal_1": 1,
"reward_var_u_1": 1,
"reward_var_c_1": 1,
"reward_var_v_1": 1,
"reward_var_g_1": 1,
"reward_var_p_1": 1,
"reward_var_illegal_2": 2,
"reward_var_u_2": 2,
"reward_var_c_2": 2,
"reward_var_v_2": 2,
"reward_var_g_2": 2,
"reward_var_p_2": 2,
"reward_option": "proposed",
"no_action_on_overloaded": true,
"episode_length": 10,
Expand Down
8 changes: 8 additions & 0 deletions data/configs/generation-configs/workload_alibaba.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"workload_type": "arabesque",
"notes": "arabesque dataset usage",
"cluster_id": 1,
"num_workloads":50,
"plot_smoothing":301,
"seed":42
}
8 changes: 8 additions & 0 deletions data/configs/generation-configs/workload_arabesque.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"workload_type": "arabesque",
"notes": "arabesque dataset usage",
"cluster_id": 1,
"num_services":50,
"plot_smoothing":301,
"seed":42
}
10 changes: 6 additions & 4 deletions experiments/analysis/check_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,15 @@ def check_env(*, config: Dict[str, Any], type_env: str,
_ = env.reset()

reward_total = []
# users_distances = []
# episode_total_latency_reward = 0
while i < total_timesteps:
action = env.action_space.sample()
_, reward, done, info = env.step(action)
if info['scheduling_success']:
print('scheudling timesteps')
if info['scheduling_timestep']:
print('scheudling timestep')
if info['scheduling_success']:
print('scheduling successful :)')
else:
print('scheduling unsuccessful :(')
# consolidation_rewards.append(consolidation_reward)
reward_total.append(reward)
env.render()
Expand Down
72 changes: 46 additions & 26 deletions experiments/cluster_generator/generate_workload.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,36 @@
from pprint import PrettyPrinter
pp = PrettyPrinter(indent=4)

from smart_scheduler.cluster_generator import\
WorkloadGeneratorRandom
from smart_scheduler.cluster_generator import (
WorkloadGeneratorRandom,
WorkloadGeneratorArabesque,
WorkloadGeneratorAlibaba
)

# get an absolute path to the directory that contains parent files
project_dir = os.path.dirname(os.path.join(os.getcwd(), __file__))
sys.path.append(os.path.normpath(os.path.join(project_dir, '..', '..')))

from experiments.utils.constants import (
CLUSTERS_PATH,
CONFIGS_PATH
CONFIGS_PATH,
ARABESQUE_PATH,
ALIBABA_PATH
)

from experiments.utils import config_workload_generation_check

# generaing the workloads
# def generate_workload(notes: str, cluster_id: int,
# workloads_var: List[List], timesteps: int,
# services_types: int, plot_smoothing: int,
# workload_type: str, seed: int):
def generate_workload(notes: str, cluster_id: int,
workloads_var: List[List], timesteps: int,
services_types: int, plot_smoothing: int,
workload_type: str, seed: int):
workload_type: str, seed: int,
plot_smoothing: int,
**kwargs):
"""
generate a random workload
generate a worload random or dataset
"""
# cluster path for the cluster
cluster_path = os.path.join(CLUSTERS_PATH, str(cluster_id))
Expand All @@ -57,33 +66,41 @@ def generate_workload(notes: str, cluster_id: int,
dir2save = os.path.join(workload_path, str(new_workload))
os.mkdir(dir2save)

# generate the workload
workload_generator = WorkloadGeneratorRandom(
cluster=cluster,
workloads_var=workloads_var,
timesteps=timesteps,
num_services_types=services_types,
start_workloads=start_workloads,
plot_smoothing=plot_smoothing,
seed=seed)
workloads, figs = workload_generator.make_workloads()

# information of the generated workload
if workload_type == 'random':
# generate the workload
workload_generator = WorkloadGeneratorRandom(
cluster=cluster,
workloads_var=kwargs['workloads_var'],
timesteps=kwargs['timesteps'],
num_services_types=kwargs['services_types'],
start_workloads=start_workloads,
plot_smoothing=plot_smoothing,
seed=seed)
workloads, figs = workload_generator.make_workloads()
# information of the generated workload
elif workload_type == 'arabesque':
# generate the workload
workload_generator = WorkloadGeneratorArabesque(
cluster=cluster,
dataset_path=ARABESQUE_PATH,
num_services=kwargs['num_services'],
plot_smoothing=plot_smoothing,
seed=seed)
workloads, figs = workload_generator.make_workloads()
elif workload_type == 'alibaba':
b = 1
info = {
'notes': notes,
'dataest_id': cluster_id,
'timesteps': timesteps,
'services_types': services_types,
'workload_var': workloads_var,
'plot_smoothing': plot_smoothing,
'workload_type': workload_type,
'seed': seed
}

workloads_save = {
'workload_type': workload_type,
'workloads': workloads
}

# save the information and workload in the folder
with open(os.path.join(dir2save, 'info.json'), 'x') as out_file:
json.dump(info, out_file, indent=4)
Expand All @@ -97,13 +114,16 @@ def generate_workload(notes: str, cluster_id: int,
for i, fig in enumerate(figs):
fig.savefig(os.path.join(figures_dir, f'services_type_{i}.png'))


def main():
@click.command()
@click.option('--workload-type',
type=click.Choice(
['random', 'arabesque', 'alibaba']), default='random')
def main(workload_type: str):
# read the config file
config_file_path = os.path.join(
CONFIGS_PATH,
'generation-configs',
'workload.json')
f'workload_{workload_type}.json')
with open(config_file_path) as cf:
config = json.loads(cf.read())
config_workload_generation_check(config=config)
Expand Down
2 changes: 1 addition & 1 deletion experiments/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def learner(*, local_mode: bool,
@click.command()
@click.option('--local-mode', type=bool, default=True)
@click.option('--config-file', type=str, default='PPO-debug')
@click.option('--series', required=True, type=int, default=71)
@click.option('--series', required=True, type=int, default=1)
@click.option('--type-env', required=True,
type=click.Choice(['sim-scheduler', 'sim-binpacking',
'CartPole-v0', 'Pendulum-v0']),
Expand Down
24 changes: 0 additions & 24 deletions experiments/utils/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,30 +152,6 @@ def on_episode_end(self, *, worker: RolloutWorker,
episode.custom_metrics['reward_g'] = episode_reward_g
episode.custom_metrics['reward_p'] = episode_reward_p

# add histogram data
# episode.hist_data["scheduling_timestep_avg"] = \
# episode.user_data["scheduling_timestep_avg"]
# episode.hist_data["scheduling_success_avg"] = \
# episode.user_data["scheduling_success_avg"]
# episode.hist_data["num_consolidated_avg"] = \
# episode.user_data["num_consolidated_avg"]
# episode.hist_data["num_overloaded_avg"] = \
# episode.user_data["num_overloaded_avg"]
# episode.hist_data["time"] = \
# episode.user_data["time"]
# episode.hist_data["timesetp_episode"] = \
# episode.user_data["timesetp_episode"]
# episode.hist_data["reward_u"] = \
# episode.user_data["reward_u"]
# episode.hist_data["reward_c"] = \
# episode.user_data["reward_c"]
# episode.hist_data["reward_v"] = \
# episode.user_data["reward_v"]
# episode.hist_data["reward_g"] = \
# episode.user_data["reward_g"]
# episode.hist_data["reward_p"] = \
# episode.user_data["reward_p"]

def on_postprocess_trajectory(
self, *, worker: "RolloutWorker", episode: MultiAgentEpisode,
agent_id: AgentID, policy_id,
Expand Down
15 changes: 10 additions & 5 deletions experiments/utils/check_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def config_cluster_generation_check(config: Dict[str, Any]):

def config_workload_generation_check(config: Dict[str, Any]):
allowed_items = ['notes', 'cluster_id', 'timesteps', 'services_types',
'workloads_var', 'plot_smoothing', 'seed', 'workload_type']
'workloads_var', 'plot_smoothing', 'seed', 'workload_type',
'num_workloads', 'num_services']
for key, _ in config.items():
assert key in allowed_items, (f"<{key}> is not an allowed items for"
" the workload generation config")
Expand All @@ -58,8 +59,10 @@ def env_config_base_check(config: Dict[str, Any]):
# check the items
allowed_items = ['obs_elements', 'penalty_illegal', 'penalty_u',
'penalty_c', 'penalty_v', 'penalty_g', 'penalty_p',
'reward_var_illegal', 'reward_var_u', 'reward_var_c',
'reward_var_v', 'reward_var_g', 'reward_var_p',
'reward_var_illegal_1', 'reward_var_u_1',
'reward_var_c_1', 'reward_var_v_1', 'reward_var_g_1', 'reward_var_p_1',
'reward_var_illegal_2', 'reward_var_u_2',
'reward_var_c_2', 'reward_var_v_2', 'reward_var_g_2', 'reward_var_p_2',
'mitigation_tries', 'episode_length', 'placement_reset',
'compute_greedy_num_consolidated', 'seed', 'cluster',
'workload', 'nodes_cap_rng', 'services_request_rng',
Expand All @@ -79,8 +82,10 @@ def env_config_base_check(config: Dict[str, Any]):
for item in ints:
assert type(config[item]) == int, f"<{item}> must be an integer"
floats = ['penalty_illegal', 'penalty_u', 'penalty_c', 'penalty_v',
'penalty_g', 'penalty_p', 'reward_var_illegal', 'reward_var_u',
'reward_var_c', 'reward_var_v', 'reward_var_g', 'reward_var_p']
'penalty_g', 'penalty_p', 'reward_var_illegal_1', 'reward_var_u_1',
'reward_var_c_1', 'reward_var_v_1', 'reward_var_g_1', 'reward_var_p_1',
'reward_var_illegal_2', 'reward_var_u_2',
'reward_var_c_2', 'reward_var_v_2', 'reward_var_g_2', 'reward_var_p_2']
for item in floats:
assert type(config[item])==float or type(config[item])==int,\
f"[{item}] must be a float"
Expand Down
5 changes: 2 additions & 3 deletions experiments/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
CONFIGS_PATH = os.path.join(DATA_PATH, "configs")
BACKUP_PATH = os.path.join(DATA_PATH, "backup")
PLOTS_PATH = os.path.join(DATA_PATH, "plots")
CLUSTERS_METADATA_PATH = os.path.join(DATA_PATH, "cluster_metadata")
ARABESQUE_PATH = os.path.join(DATA_PATH, "arabesque")
ALIBABA_PATH = os.path.join(DATA_PATH, "alibaba")

def _create_dirs():
"""
Expand All @@ -32,8 +33,6 @@ def _create_dirs():
os.makedirs(BACKUP_PATH)
if not os.path.exists(TESTS_RESULTS_PATH):
os.makedirs(TESTS_RESULTS_PATH)
if not os.path.exists(CLUSTERS_METADATA_PATH):
os.makedirs(CLUSTERS_METADATA_PATH)
if not os.path.exists(PLOTS_PATH):
os.makedirs(PLOTS_PATH)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .cluster_generator_random import ClusterGeneratorRandom
from .workload_generator_random import WorkloadGeneratorRandom
from .workload_generator_arabesque import WorkloadGeneratorArabesque
from .workload_generator_alibaba import WorkloadGeneratorAlibaba
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# TODO in case of integration with the Alibaba workload at
# some point


import numpy as np
import random
from tqdm import tqdm
from smart_scheduler.util import (
plot_workload,
rounding)
import random
import string
from typing import List


class WorkloadGeneratorAlibaba:
def __init__(self, cluster, num_services, plot_smoothing, seed):
"""
cluster generator
"""
# self.cluster = cluster
self.seed = seed
np.random.seed(self.seed)
random.seed(seed)
# self.services_types: np.array = self.cluster['services_types']

self.plot_smoothing = plot_smoothing

def make_workloads(self):
"""
making random workload per each service type
each channel (third dimension) is a different workload for
a different machine
generating workloads
timesteps
| |
| | | |
ram | | | |
cpu | | types of services
start workload:
different types
ram | |
cpu | |
"""

# option 1: with variations in the workload resource usage
# workloads = np.zeros((self.num_resources,
# self.timesteps,
# self.num_services_types))
# workloads[:, 0] = self.start_workloads

# for col in tqdm(range(1, self.timesteps)):
# num_steps = np.random.randint(-self.workloads_max_steps,
# self.workloads_max_steps+1)
# steps = num_steps * self.workloads_steps_units
# workloads[:, col] = workloads[:, col-1] + steps
# workloads[workloads < 0] = 0
# workloads[workloads > 1] = 1
# workloads = np.round(workloads, 2)

figs = []
# TODO use once done
# for i in range(self.num_services_types):
# workload_i = workloads[:, :, i]
# fig = plot_workload(self.timesteps, workload_i,
# self.plot_smoothing, i)
# figs.append(fig)
# return workloads, figs

Loading

0 comments on commit da8e5c0

Please sign in to comment.