Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions scripts/aws/config-server/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Flask==2.3.2
Werkzeug==3.0.3
setuptools==70.0.0
requests==2.32.3
boto3==1.35.59
urllib3==2.2.3
179 changes: 179 additions & 0 deletions scripts/aws/ec2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env python3

import boto3
import json
import os
import subprocess
import re
import multiprocessing
import requests
import signal
import argparse
from botocore.exceptions import ClientError
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from confidential_compute import ConfidentialCompute

class EC2(ConfidentialCompute):

def __init__(self):
super().__init__()
self.config = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below the member variable being used appears to be self.configs.


def __get_aws_token(self):
try:
token_url = "http://169.254.169.254/latest/api/token"
token_response = requests.put(token_url, headers={"X-aws-ec2-metadata-token-ttl-seconds": "3600"}, timeout=2)
return token_response.text
except Exception as e:
return "blank"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not let the exception propagate through? Why is it valid to swallow the error here?


def __get_current_region(self):
token = self.__get_aws_token()
metadata_url = "http://169.254.169.254/latest/dynamic/instance-identity/document"
headers = {"X-aws-ec2-metadata-token": token}
try:
response = requests.get(metadata_url, headers=headers,timeout=2)
if response.status_code == 200:
return response.json().get("region")
else:
print(f"Failed to fetch region, status code: {response.status_code}")
except Exception as e:
raise Exception(f"Region not found, are you running in EC2 environment. {e}")

def _get_secret(self, secret_identifier):
client = boto3.client("secretsmanager", region_name=self.__get_current_region())
try:
secret = client.get_secret_value(SecretId=secret_identifier)
return json.loads(secret["SecretString"])
except ClientError as e:
raise Exception("Unable to access secret store")

def __add_defaults(self, configs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a static method?

configs.setdefault("enclave_memory_mb", 24576)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are these default coming from? Why are they reasonable?

configs.setdefault("enclave_cpu_count", 6)
configs.setdefault("debug_mode", False)
return configs

def __setup_vsockproxy(self, log_level):
thread_count = int((multiprocessing.cpu_count() + 1) // 2)
log_level = log_level
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Redundant line?

try:
subprocess.Popen(["/usr/bin/vsockpx", "-c", "/etc/uid2operator/proxy.yaml", "--workers", str(thread_count), "--log-level", log_level, "--daemon"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This launches process in background without waiting for command to finish. If proxy fails to daemonise for some reason, you may not know.

print("VSOCK proxy is now running in the background")
except FileNotFoundError:
print("Error: vsockpx not found. Please ensure the path is correct")
except Exception as e:
print("Failed to start VSOCK proxy")

def __run_config_server(self, log_level):
os.makedirs("/etc/secret/secret-value", exist_ok=True)
with open('/etc/secret/secret-value/config', 'w') as fp:
json.dump(self.configs, fp)
os.chdir("/opt/uid2operator/config-server")
# TODO: Add --log-level to flask.
try:
subprocess.Popen(["./bin/flask", "run", "--host", "127.0.0.1", "--port", "27015"])
print("Config server is now running in the background.")
except Exception as e:
print(f"Failed to start config server: {e}")

def __run_socks_proxy(self, log_level):
subprocess.Popen(["sockd", "-d"])

def __get_secret_name_from_userdata(self):
token = self.__get_aws_token()
user_data_url = "http://169.254.169.254/latest/user-data"
user_data_response = requests.get(user_data_url, headers={"X-aws-ec2-metadata-token": token})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if this fails?

user_data = user_data_response.text
identity_scope = open("/opt/uid2operator/identity_scope.txt").read().strip()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to close the file?

default_name = "{}-operator-config-key".format(identity_scope.lower())
hardcoded_value = "{}_CONFIG_SECRET_KEY".format(identity_scope.upper())
match = re.search(rf'^export {hardcoded_value}="(.+?)"$', user_data, re.MULTILINE)
return match.group(1) if match else default_name

def _setup_auxilaries(self):
hostname = os.getenv("HOSTNAME", default=os.uname()[1])
file_path = "HOSTNAME"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this necessary?

try:
with open(file_path, "w") as file:
file.write(hostname)
print(f"Hostname '{hostname}' written to {file_path}")
except Exception as e:
print(f"An error occurred : {e}")
config = self._get_secret(self.__get_secret_name_from_userdata())
self.configs = self.__add_defaults(config)
log_level = 3 if self.configs['debug_mode'] else 1
self.__setup_vsockproxy(log_level)
self.__run_config_server(log_level)
self.__run_socks_proxy(log_level)


def _validate_auxilaries(self):
proxy = "socks5h://127.0.0.1:3305"
url = "http://127.0.0.1:27015/getConfig"
response = requests.get(url)
if response.status_code != 200:
raise Exception("Config server unreachable")
proxies = {
"http": proxy,
"https": proxy,
}
try:
response = requests.get(url, proxies=proxies)
response.raise_for_status()
except Exception as e:
raise Exception(f"Cannot conect to config server through socks5: {e}")

def run_compute(self):
self._setup_auxilaries()
self._validate_auxilaries()
command = [
"nitro-cli", "run-enclave",
"--eif-path", "/opt/uid2operator/uid2operator.eif",
"--memory", self.config['enclave_memory_mb'],
"--cpu-count", self.config['enclave_cpu_count'],
"--enclave-cid", 42,
"--enclave-name", "uid2operator"
]
if self.config['debug']:
command+=["--debug-mode", "--attach-console"]
subprocess.run(command, check=True)

def cleanup(self):
describe_output = subprocess.check_output(["nitro-cli", "describe-enclaves"], text=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not terminate enclave by name? Or terminate all enclaves?

enclaves = json.loads(describe_output)
enclave_id = enclaves[0].get("EnclaveID") if enclaves else None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just kills the first enclave from the list without consideration to whether it is the right one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we only run 1 enclave on a host, right?

can add validation if needed

if enclave_id:
subprocess.run(["nitro-cli", "terminate-enclave", "--enclave-id", enclave_id])
print(f"Enclave with ID {enclave_id} has been terminated.")
else:
print("No enclave found or EnclaveID is null.")

def kill_process(self, process_name):
try:
result = subprocess.run(
["pgrep", "-f", process_name],
stdout=subprocess.PIPE,
text=True,
check=False
)
if result.stdout.strip():
for pid in result.stdout.strip().split("\n"):
os.kill(int(pid), signal.SIGKILL)
print(f"{process_name} exited")
else:
print(f"Process {process_name} not found")
except Exception as e:
print(f"Failed to shut down {process_name}: {e}")

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--operation", required=False)
args = parser.parse_args()
ec2 = EC2()
if args.operation and args.operation == "stop":
ec2.cleanup()
[ec2.kill_process(process) for process in ["vsockpx", "sockd", "vsock-proxy", "nohup"]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is nohup in this list? This could potentially kill completely unrelated processes. And why are there both vsockpx and vsock-proxy in the list?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

else:
ec2.run_compute()
1 change: 1 addition & 0 deletions scripts/aws/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ else
exit 1
fi

# DO WE NEED THIS? do we expect customers to change URL?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've never heard of it being done before.

# -- replace base URLs if both CORE_BASE_URL and OPTOUT_BASE_URL are provided
# -- using hardcoded domains is fine because they should not be changed frequently
if [ -n "${CORE_BASE_URL}" ] && [ "${CORE_BASE_URL}" != "null" ] && [ -n "${OPTOUT_BASE_URL}" ] && [ "${OPTOUT_BASE_URL}" != "null" ] && [ "${DEPLOYMENT_ENVIRONMENT}" != "prod" ]; then
Expand Down
124 changes: 0 additions & 124 deletions scripts/aws/start.sh

This file was deleted.

31 changes: 0 additions & 31 deletions scripts/aws/stop.sh

This file was deleted.

17 changes: 3 additions & 14 deletions scripts/aws/uid2-operator-ami/ansible/playbook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,24 +72,13 @@

- name: Install starter script
ansible.builtin.copy:
src: /tmp/artifacts/start.sh
dest: /opt/uid2operator/start.sh
src: /tmp/artifacts/ec2.py
dest: /opt/uid2operator/ec2.py
remote_src: yes

- name: Make starter script executable
ansible.builtin.file:
path: /opt/uid2operator/start.sh
mode: '0755'

- name: Install stopper script
ansible.builtin.copy:
src: /tmp/artifacts/stop.sh
dest: /opt/uid2operator/stop.sh
remote_src: yes

- name: Make starter script executable
ansible.builtin.file:
path: /opt/uid2operator/stop.sh
path: /opt/uid2operator/ec2.py
mode: '0755'

- name: Install Operator EIF
Expand Down
4 changes: 2 additions & 2 deletions scripts/aws/uid2operator.service
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ RemainAfterExit=true
StandardOutput=journal
StandardError=journal
SyslogIdentifier=uid2operator
ExecStart=/opt/uid2operator/start.sh
ExecStop=/opt/uid2operator/stop.sh
ExecStart=/opt/uid2operator/ec2.py
ExecStop=/opt/uid2operator/ec2.py -o stop

[Install]
WantedBy=multi-user.target
Loading