Skip to content

Commit

Permalink
sync with upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
saeid93 committed May 11, 2023
2 parents 0e7076f + 62114a1 commit 6f838a1
Show file tree
Hide file tree
Showing 112 changed files with 1,067 additions and 757 deletions.
80 changes: 48 additions & 32 deletions Dockerfiles/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import logging
import socket
import os
import time
import signal
import threading

logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d:%H:%M:%S',
Expand All @@ -16,6 +19,16 @@
app = Flask(__name__)


DURATION = int(os.environ['DURATION'])
logging.info(f'DURATION set to: {DURATION}')



def stop_server():
print('Stopping Flask app...')
# Send a SIGTERM signal to the current process
os.kill(os.getpid(), signal.SIGTERM)

@app.route('/', methods=['GET'])
def index():

Expand All @@ -28,7 +41,7 @@ def index():

try:

logging.info("received new configuration from utilization-server: {}".format(
logging.info("received new configuration from sdghafoLIFETIMEuri/utilization-server-smart-scheduler: {}".format(
request.form
))

Expand All @@ -50,34 +63,37 @@ def index():
# get hostname of current machine
hostname = socket.gethostname()
command = None

logging.info('trying to connect to the utilization-server')
while True:
try:
# register into controller and setup the stress
controller = requests.get('http://utilization-server.consolidation.svc/metrics/{}/'.format(hostname))

if controller.status_code == 404:
logging.info(controller.content)
time.sleep(1)
continue

content = controller.json()
logging.info("got resources: {}".format(content))

# running stress
command = COMMAND_TEMPLATE.format(
content.get('ram'),
content.get('cpu'),
)
break

except Exception as e:
logging.error(e)
exit(-1)

logging.info('running stress-ng: "{}"'.format(command))
os.system(command)

logging.info("serving 'app' on port {}".format(PORT))
app.run(host="0.0.0.0", port=PORT, debug=True, use_reloader=False)
logging.info('starting service ...')
# logging.info('trying to connect to the sdghafouri/utilization-server-smart-scheduler')
# while True:
# try:
# # register into controller and setup the stress
# controller = requests.get('http://sdghafouri/utilization-server-smart-scheduler.consolidation.svc/metrics/{}/'.format(hostname))

# if controller.status_code == 404:
# logging.info(controller.content)
# time.sleep(1)
# continue

# content = controller.json()
# logging.info("got resources: {}".format(content))

# # running stress
# command = COMMAND_TEMPLATE.format(
# content.get('ram'),
# content.get('cpu'),
# )
# break

# except Exception as e:
# logging.error(e)
# exit(-1)

# logging.info('running stress-ng: "{}"'.format(command))
# os.system(command)

timer = threading.Timer(DURATION, stop_server)
timer.start()

# logging.info("serving 'app' on port {}".format(PORT))
# app.run(host="0.0.0.0", port=PORT, debug=True, use_reloader=False)
1 change: 1 addition & 0 deletions Dockerfiles/service/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker build --tag=service:latest . && docker tag service:latest sdghafouri/service-smart-scheduler && docker push sdghafouri/service-smart-scheduler
38 changes: 19 additions & 19 deletions Dockerfiles/utilization-server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,25 +166,25 @@ def runScheduler():

if __name__ == '__main__':

while True:
try:

with open(WORKLOAD_PATH, 'rb') as file:
WORKLOADS = WorkLoads(pickle.load(file))

with open(CLUSTER_PATH, 'rb') as file:
CLUSTER = Cluster(pickle.load(file))

break
except Exception as e:
logging.info(
'looking for file "{}" and "{}", '
'in order to run web server, you need to upload them: {}'.format(
WORKLOAD_PATH,
CLUSTER_PATH,
e
))
time.sleep(1)
# while True:
# try:

# with open(WORKLOAD_PATH, 'rb') as file:
# WORKLOADS = WorkLoads(pickle.load(file))

# with open(CLUSTER_PATH, 'rb') as file:
# CLUSTER = Cluster(pickle.load(file))

# break
# except Exception as e:
# logging.info(
# 'looking for file "{}" and "{}", '
# 'in order to run web server, you need to upload them: {}'.format(
# WORKLOAD_PATH,
# CLUSTER_PATH,
# e
# ))
# time.sleep(1)

logging.info("serving 'app' on port {}".format(PORT))
app.run(host="0.0.0.0", port=PORT, debug=True, use_reloader=False)
2 changes: 1 addition & 1 deletion Dockerfiles/utilization-server/build.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
docker build --tag=utilization-server:latest . && docker tag utilization-server:latest sdghafouri/utilization-server && docker push sdghafouri/utilization-server
docker build --tag=sdghafouri/utilization-server-smart-scheduler:latest . && docker tag sdghafouri/utilization-server-smart-scheduler:latest sdghafouri/utilization-server-smart-scheduler && docker push sdghafouri/utilization-server-smart-scheduler
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# 1. Introduction
## 1.1. Repo contetns
This repo contains the code of the upcoming vpa aware scheduling paper
## Abstract
One of the most challenging problems in the popular orchestration framework Kubernetes is assigning sufficient resources to containers to operate at a required level while also avoiding excessive resource allocation which can delay other jobs in the cluster. A variety of heuristic approaches have been proposed to tackle this problem but these require considerable manual adjustments which can be laborious. Reinforcement learning approaches have been proposed to address this issue but these proposals do not consider the energy consumption of the cluster. This is an important component of the problem due to the commitments of large cloud operators to carbon neutrality. We have proposed a system called Smart-Kube to achieve a target utilization on nodes while maintaining energy consumption at a reasonable level. An experimental framework is designed on top of real-world Kubernetes clusters and real-world traces of container jobs are used to evaluate the framework. Experimental results show that Smart-Kube can approach the target utilization and reduce energy consumption in a variety of ways depending on the preferences of the cluster operator for a variety of cluster sizes.

## Setup the environment in your machine
1. Download source code from GitHub
Expand Down
4 changes: 2 additions & 2 deletions data/configs/check/check_env.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_17.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_18.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_19.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_20.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_21.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
4 changes: 2 additions & 2 deletions data/configs/check/check_env_22.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
},
"kube":{
"admin_config": "~/.kube/config",
"service_image": "sdghafouri/service",
"service_image": "sdghafouri/service-smart-scheduler",
"namespace": "consolidation",
"clean_after_exit": true,
"utilization_image": "sdghafouri/utilization-server"
"utilization_image": "sdghafouri/utilization-server-smart-scheduler"
}
}
}
Loading

0 comments on commit 6f838a1

Please sign in to comment.