Skip to content

Commit ac63287

Browse files
authored
feat: Update /etc/sub*id files to support nested containers (#232)
* feat: Update /etc/sub*id files in order to run podman command in a user namespace Signed-off-by: Anatolii Bazko <[email protected]>
1 parent e05d895 commit ac63287

File tree

2 files changed

+64
-1
lines changed

2 files changed

+64
-1
lines changed

base/ubi9/Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,5 +180,6 @@ RUN \
180180
USER 10001
181181
ENV HOME=/home/user
182182
WORKDIR /projects
183-
ENTRYPOINT [ "/entrypoint.sh" ]
183+
# /usr/libexec/podman/catatonit is used to reap zombie processes
184+
ENTRYPOINT ["/usr/libexec/podman/catatonit","--","/entrypoint.sh"]
184185
CMD ["tail", "-f", "/dev/null"]

base/ubi9/entrypoint.sh

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,68 @@ if ! whoami &> /dev/null; then
3333
fi
3434
fi
3535

36+
# The user namespace is created when `UserNamespacesSupport` feature is enabled and `hostUsers` is set to false in Pod spec.
37+
# See for more details:
38+
# - https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/#introduction
39+
# Assume, that HOST_USERS environment variable is provided and is set to false in that case.
40+
# If so, update /etc/sub*id files to reflect the valid UID/GID range.
41+
if [ "${HOST_USERS}" == "false" ]; then
42+
echo "Running in a user namespace"
43+
if [ -f /proc/self/uid_map ]; then
44+
# Typical output of `/proc/self/uid_map`:
45+
# 1. When NOT running in a user namespace:
46+
# 0 0 4294967295
47+
# 2. When running in a user namespace:
48+
# 0 1481179136 65536
49+
# 3. When container is run in a user namespace:
50+
# 0 1000 1
51+
# 1 1001 64535
52+
# We can use the content of /proc/self/uid_map to detect if we are running in a user namespace.
53+
# However, to keep things simple, we will rely on HOST_USERS environment variable only.
54+
# This way, we avoid breaking anything.
55+
echo "/proc/self/uid_map content: $(cat /proc/self/uid_map)"
56+
fi
57+
58+
# Why do we need to update /etc/sub*id files?
59+
# We are already in the user namespace, so we know there are at least 65536 UIDs/GIDs available.
60+
# For more details, see:
61+
# - https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/#id-count-for-each-of-pods
62+
# Podman needs to create a new user namespace for any container being launched and map the container's user
63+
# and group IDs (UID/GID) to the corresponding user on the current namespace.
64+
# For the mapping, podman refers to the /etc/sub*id files.
65+
# For more details, see:
66+
# - https://man7.org/linux/man-pages/man5/subuid.5.html
67+
# So if the user ID exceeds 65535, it cannot be mapped if only UIDs/GIDs from 0-65535 are available.
68+
# If that's the case, podman commands would fail.
69+
70+
# Even though the range can be extended using configuration, we can rely on the fact that there are at least 65536 user IDs available in the user namespace.
71+
# See for more details:
72+
# - https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-UserNamespaces
73+
74+
# To ensure the provided user ID stays within this range, `runAsUser` in the Pod spec should be set to a value below 65536.
75+
# In OpenShift, the Container Security Context Constraint (SCC) should be created accordingly.
76+
if whoami &> /dev/null; then
77+
echo "User information: $(id)"
78+
79+
USER_ID=$(id -u)
80+
if [ ${USER_ID} -lt 65536 ]; then
81+
USER_NAME=$(whoami)
82+
START_ID=$(( ${USER_ID} + 1 ))
83+
COUNT=$(( 65536 - ${START_ID} ))
84+
IDS_RANGE="${USER_NAME}:${START_ID}:${COUNT}"
85+
86+
if [ -w /etc/subuid ]; then
87+
echo "${IDS_RANGE}" > /etc/subuid
88+
echo "/etc/subuid updated"
89+
fi
90+
if [ -w /etc/subgid ]; then
91+
echo "${IDS_RANGE}" > /etc/subgid
92+
echo "/etc/subgid updated"
93+
fi
94+
fi
95+
fi
96+
fi
97+
3698
source kubedock_setup
3799

38100

0 commit comments

Comments
 (0)