Skip to content

Commit a36305b

Browse files
authored
Merge pull request #77 from arm/axion-data-addition
add axion data
2 parents 70ad0d9 + 2a610f5 commit a36305b

File tree

4 files changed

+178
-37
lines changed

4 files changed

+178
-37
lines changed

embedding-generation/eval_questions.json

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,75 @@
7676
"expected_urls": ["https://amperecomputing.com/blogs/getting-cloud-native-with-freebsd-on-oci-ampere-a1-with-terraform-"]
7777
},
7878
{
79-
"question": "In the AWS Graviton performance runbook, how should I define a benchmark and configure the system under test before optimization?",
79+
"question": "What Google Axion-backed Compute Engine machine series are available for Arm VMs, and how do C4A and N4A differ?",
8080
"expected_urls": [
81-
"https://github.com/aws/aws-graviton-getting-started/blob/main/perfrunbook/defining_your_benchmark.md",
82-
"https://github.com/aws/aws-graviton-getting-started/blob/main/perfrunbook/configuring_your_sut.md"
81+
"https://docs.cloud.google.com/compute/docs/instances/arm-on-compute"
82+
]
83+
},
84+
{
85+
"question": "On Google Cloud CPU platforms, which Arm machine series map to Google Axion versus Ampere Altra, and how are vCPUs counted on those platforms?",
86+
"expected_urls": [
87+
"https://docs.cloud.google.com/compute/docs/cpu-platforms"
88+
]
89+
},
90+
{
91+
"question": "What storage, networking, and workload positioning does Google Cloud call out for the C4A and N4A general-purpose machine families?",
92+
"expected_urls": [
93+
"https://docs.cloud.google.com/compute/docs/general-purpose-machines"
94+
]
95+
},
96+
{
97+
"question": "How does Google Cloud's next-generation dynamic resource management improve N4A VM placement and live migration for Axion workloads?",
98+
"expected_urls": [
99+
"https://docs.cloud.google.com/compute/docs/dynamic-resource-management"
100+
]
101+
},
102+
{
103+
"question": "What C4A bare metal instance options does Compute Engine provide, including the machine type, vCPU count, memory, and network bandwidth?",
104+
"expected_urls": [
105+
"https://docs.cloud.google.com/compute/docs/instances/bare-metal-instances"
106+
]
107+
},
108+
{
109+
"question": "In Google Cloud Troubleshooting Arm VMs, what happens if you create an Arm VM from a boot disk with x86 architecture, and how do serial console logs help identify it?",
110+
"expected_urls": [
111+
"https://docs.cloud.google.com/compute/docs/troubleshooting/troubleshooting-arm-vms"
112+
]
113+
},
114+
{
115+
"question": "Which GKE cluster modes and machine families support Arm workloads, and what are the major limitations for Arm nodes on GKE?",
116+
"expected_urls": [
117+
"https://docs.cloud.google.com/kubernetes-engine/docs/concepts/arm-on-gke"
118+
]
119+
},
120+
{
121+
"question": "How do you build and verify a multi-architecture container image so the same image can run on both x86 and Arm nodes in GKE?",
122+
"expected_urls": [
123+
"https://docs.cloud.google.com/kubernetes-engine/docs/how-to/build-multi-arch-for-arm"
124+
]
125+
},
126+
{
127+
"question": "In a GKE Standard cluster, why are Arm nodes tainted by default and what selectors or tolerations are needed to schedule workloads onto them?",
128+
"expected_urls": [
129+
"https://docs.cloud.google.com/kubernetes-engine/docs/how-to/prepare-arm-workloads-for-deployment"
130+
]
131+
},
132+
{
133+
"question": "How do you request Arm nodes in GKE Autopilot, and what does kubernetes.io/arch=arm64 select on newer versus older cluster versions?",
134+
"expected_urls": [
135+
"https://docs.cloud.google.com/kubernetes-engine/docs/how-to/autopilot-arm-workloads"
136+
]
137+
},
138+
{
139+
"question": "What are the main steps in the GKE tutorial for migrating an x86-only application to a multi-arch image that also runs on Arm?",
140+
"expected_urls": [
141+
"https://docs.cloud.google.com/kubernetes-engine/docs/tutorials/migrate-x86-to-multi-arch-arm"
142+
]
143+
},
144+
{
145+
"question": "In GKE Troubleshooting Arm workloads, what does the Pod log message \"exec ./hello-app: exec format error\" mean, and what is the recommended fix?",
146+
"expected_urls": [
147+
"https://docs.cloud.google.com/kubernetes-engine/docs/troubleshooting/arm-workloads"
83148
]
84149
}
85150
]

embedding-generation/generate-chunks.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,13 +163,26 @@ def register_source(site_name, license_type, display_name, url, keywords):
163163
return False
164164

165165
known_source_urls.add(url)
166-
all_sources.append({
166+
source_entry = {
167167
'site_name': site_name,
168168
'license_type': license_type,
169169
'display_name': display_name,
170170
'url': url,
171171
'keywords': keywords if isinstance(keywords, str) else '; '.join(keywords)
172-
})
172+
}
173+
174+
# Keep discovered sources grouped with their existing site section instead of
175+
# appending them to the very end of the CSV and fragmenting that block.
176+
insert_at = None
177+
for index, existing_source in enumerate(all_sources):
178+
if existing_source.get('site_name') == site_name:
179+
insert_at = index + 1
180+
181+
if insert_at is None:
182+
all_sources.append(source_entry)
183+
else:
184+
all_sources.insert(insert_at, source_entry)
185+
173186
print(f"[NEW SOURCE] {display_name}: {url}")
174187
return True
175188

embedding-generation/tests/test_generate_chunks.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,57 @@ def test_register_source_duplicate(self, gc):
155155
assert result is False
156156
assert len(gc.all_sources) == 1
157157

158+
def test_register_source_inserts_after_matching_site_group(self, gc):
159+
"""Test that new sources stay grouped with existing sources from the same site."""
160+
gc.all_sources = [
161+
{
162+
'site_name': 'Google Cloud',
163+
'license_type': 'CC4.0',
164+
'display_name': 'Google 1',
165+
'url': 'https://example.com/google-1',
166+
'keywords': 'g1'
167+
},
168+
{
169+
'site_name': 'Ecosystem Dashboard',
170+
'license_type': 'Arm Proprietary',
171+
'display_name': 'Dashboard 1',
172+
'url': 'https://example.com/dashboard-1',
173+
'keywords': 'd1'
174+
},
175+
{
176+
'site_name': 'Ecosystem Dashboard',
177+
'license_type': 'Arm Proprietary',
178+
'display_name': 'Dashboard 2',
179+
'url': 'https://example.com/dashboard-2',
180+
'keywords': 'd2'
181+
},
182+
{
183+
'site_name': 'AWS Graviton',
184+
'license_type': 'Apache-2.0',
185+
'display_name': 'Graviton 1',
186+
'url': 'https://example.com/graviton-1',
187+
'keywords': 'a1'
188+
},
189+
]
190+
gc.known_source_urls = {source['url'] for source in gc.all_sources}
191+
192+
result = gc.register_source(
193+
site_name="Ecosystem Dashboard",
194+
license_type="Arm Proprietary",
195+
display_name="Dashboard 3",
196+
url="https://example.com/dashboard-3",
197+
keywords=["d3"]
198+
)
199+
200+
assert result is True
201+
assert [source['display_name'] for source in gc.all_sources] == [
202+
'Google 1',
203+
'Dashboard 1',
204+
'Dashboard 2',
205+
'Dashboard 3',
206+
'Graviton 1',
207+
]
208+
158209
def test_register_source_url_normalization(self, gc):
159210
"""Test that URLs are stripped of whitespace."""
160211
gc.register_source(

0 commit comments

Comments
 (0)