Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cmd/ops_agent_uap_plugin/service_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const (
LogsDirectory = "log/google-cloud-ops-agent"
FluentBitStateDiectory = "state/fluent-bit"
FluentBitRuntimeDirectory = "run/google-cloud-ops-agent-fluent-bit"
OtelStateDiectory = "state/opentelemetry-collector"
OtelRuntimeDirectory = "run/google-cloud-ops-agent-opentelemetry-collector"
DefaultPluginStateDirectory = "/var/lib/google-guest-agent/agent_state/plugins/ops-agent-plugin"
)
Expand Down Expand Up @@ -209,7 +210,8 @@ func generateSubagentConfigs(ctx context.Context, runCommand RunCommandFunc, plu
"-service", "otel",
"-in", OpsAgentConfigLocationLinux,
"-out", path.Join(pluginStateDirectory, OtelRuntimeDirectory),
"-logs", path.Join(pluginStateDirectory, LogsDirectory))
"-logs", path.Join(pluginStateDirectory, LogsDirectory),
"-state", path.Join(pluginStateDirectory, OtelStateDiectory))

if output, err := runCommand(otelConfigGenerationCmd); err != nil {
return fmt.Errorf("failed to generate Otel config:\ncommand output: %s\ncommand error: %s", output, err)
Expand All @@ -220,7 +222,8 @@ func generateSubagentConfigs(ctx context.Context, runCommand RunCommandFunc, plu
"-service", "fluentbit",
"-in", OpsAgentConfigLocationLinux,
"-out", path.Join(pluginStateDirectory, FluentBitRuntimeDirectory),
"-logs", path.Join(pluginStateDirectory, LogsDirectory), "-state", path.Join(pluginStateDirectory, FluentBitStateDiectory))
"-logs", path.Join(pluginStateDirectory, LogsDirectory),
"-state", path.Join(pluginStateDirectory, FluentBitStateDiectory))

if output, err := runCommand(fluentBitConfigGenerationCmd); err != nil {
return fmt.Errorf("failed to generate Fluntbit config:\ncommand output: %s\ncommand error: %s", output, err)
Expand Down
4 changes: 4 additions & 0 deletions cmd/ops_agent_windows/main_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ func initServices() error {
if err := os.MkdirAll(fluentbitStoragePath, 0644); err != nil {
return err
}
otelStoragePath := filepath.Join(os.Getenv("PROGRAMDATA"), dataDirectory, `run\file_storage`)
if err := os.MkdirAll(otelStoragePath, 0644); err != nil {
return err
}
logDirectory := filepath.Join(os.Getenv("PROGRAMDATA"), dataDirectory, "log")
if err := os.MkdirAll(logDirectory, 0644); err != nil {
return err
Expand Down
35 changes: 27 additions & 8 deletions confgenerator/confgenerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,32 @@ func (uc *UnifiedConfig) getOTelLogLevel() string {
return logLevel
}

func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir string) (string, error) {
// fileStorageExtensionID returns the file_storage extension used by all receivers and exporters.
func fileStorageExtensionID() string {
return "file_storage"
}

// fileStorageExtensionConfig returns a configured file_storage extension to be used by all receivers and exporters.
func fileStorageExtensionConfig(stateDir string) map[string]interface{} {
return map[string]interface{}{
"directory": path.Join(stateDir, "file_storage"),
"create_directory": true,
}
}

func (uc *UnifiedConfig) getEnabledExtensions(ctx context.Context, stateDir string) map[string]interface{} {
extensions := map[string]interface{}{}
expOtlpExporter := experimentsFromContext(ctx)["otlp_exporter"]
if expOtlpExporter {
extensions["googleclientauth"] = map[string]interface{}{}
}
if uc.Logging.Service.OTelLogging {
extensions["file_storage"] = fileStorageExtensionConfig(stateDir)
}
return extensions
}

func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir, stateDir string) (string, error) {
p := platform.FromContext(ctx)
userAgent, _ := p.UserAgent("Google-Cloud-Ops-Agent-Metrics")
metricVersionLabel, _ := p.VersionLabel("google-cloud-ops-agent-metrics")
Expand All @@ -146,17 +171,11 @@ func (uc *UnifiedConfig) GenerateOtelConfig(ctx context.Context, outDir string)
}
agentSelfMetrics.AddSelfMetricsPipelines(receiverPipelines, pipelines)

expOtlpExporter := experimentsFromContext(ctx)["otlp_exporter"]
extensions := map[string]interface{}{}
if expOtlpExporter {
extensions["googleclientauth"] = map[string]interface{}{}
}

otelConfig, err := otel.ModularConfig{
LogLevel: uc.getOTelLogLevel(),
ReceiverPipelines: receiverPipelines,
Pipelines: pipelines,
Extensions: extensions,
Extensions: uc.getEnabledExtensions(ctx, stateDir),
Exporters: map[otel.ExporterType]otel.Component{
otel.System: googleCloudExporter(userAgent, false, false),
otel.OTel: googleCloudExporter(userAgent, true, true),
Expand Down
2 changes: 1 addition & 1 deletion confgenerator/confgenerator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func generateConfigs(pc platformConfig, testDir string) (got map[string]string,
}

// Otel configs
otelGeneratedConfig, err := mergedUc.GenerateOtelConfig(ctx, "")
otelGeneratedConfig, err := mergedUc.GenerateOtelConfig(ctx, "", "")
if err != nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion confgenerator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,7 @@ func (uc *UnifiedConfig) OTelLoggingSupported(ctx context.Context) bool {
ucLoggingCopy.Logging.Service = &LoggingService{}
}
ucLoggingCopy.Logging.Service.OTelLogging = true
_, err = ucLoggingCopy.GenerateOtelConfig(ctx, "")
_, err = ucLoggingCopy.GenerateOtelConfig(ctx, "", "")
return err == nil
}

Expand Down
2 changes: 1 addition & 1 deletion confgenerator/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (uc *UnifiedConfig) GenerateFilesFromConfig(ctx context.Context, service, l
}
}
case "otel":
otelConfig, err := uc.GenerateOtelConfig(ctx, outDir)
otelConfig, err := uc.GenerateOtelConfig(ctx, outDir, stateDir)
if err != nil {
return fmt.Errorf("can't parse configuration: %w", err)
}
Expand Down
6 changes: 5 additions & 1 deletion confgenerator/logging_receivers.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ func (r LoggingReceiverFilesMixin) Pipelines(ctx context.Context) ([]otel.Receiv
"preserve_leading_whitespaces": true,
"preserve_trailing_whitespaces": true,
}
if !r.TransformationTest {
receiver_config["storage"] = fileStorageExtensionID()
}
if i := r.WildcardRefreshInterval; i != nil {
receiver_config["poll_interval"] = i.String()
}
Expand Down Expand Up @@ -606,7 +609,7 @@ func (r LoggingReceiverWindowsEventLog) Pipelines(ctx context.Context) ([]otel.R
"start_at": "beginning",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to figure out how start_at interacts with storage but the upstream docs are unclear. I assumed there would be an option for start_at that would clearly mean "pick up from where we left off according to the stored offset, rather than starting at the beginning or end", but beginning and end are the only two options.

How can I be confident that beginning is actually going to pick up from the stored file offset on collector restart rather than the beginning of the file? Can we add an integration test for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed there would be an option for start_at that would clearly mean "pick up from where we left off according to the stored offset, rather than starting at the beginning or end", but beginning and end are the only two options.

Yeah, the start_at is not descriptive. I also assumed "begging + storage" meant "start at the beginning if there is no bookmark".

One option is to do a clarification doc PR in the upstream docs for this.

How can I be confident that beginning is actually going to pick up from the stored file offset on collector restart rather than the beginning of the file? Can we add an integration test for this?

Yeah, we could do an integration test for this since transformation test are not meant to test "restarts".

  • How detailed do you think should it be to give us confidence ?
  • Should it test only files receiver or also systemd, windowseventlog ?

We could :

  1. Send 5 logs from file.
  2. Wait 2 minutes and restarts.
  3. Look for duplicate logs in the past 2 mins.

Copy link
Contributor Author

@franciscovalentecastro franciscovalentecastro Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I created the TestLogCursor integration test to verify this the cursor (bookmark) is preserved after restart.

I ran this test locally to verify that it indeed fails with an Otel Logging version without the filestorage extension

=== NAME  TestLogCursor/debian-cloud:debian-11/default
    agents.go:1010: Test logs: /tmp/401636222/TestLogCursor_debian-cloud:debian-11_default
    agents.go:1010: Instance Log: https://console.cloud.google.com/logs/viewer?resource=gce_instance%2Finstance_id%2F5331085291174606091&project=fcovalente-dev
=== NAME  TestLogCursor/debian-cloud:debian-11/otel_logging
    agents.go:1010: Instance Log: https://console.cloud.google.com/logs/viewer?resource=gce_instance%2Finstance_id%2F6330475123837351179&project=fcovalente-dev
    main_test.go:5829: AssertLogMissing(log="jsonPayload.message=\"line #2\""): <nil> failed: unexpectedly found data for log
    main_test.go:5829: AssertLogMissing(log="jsonPayload.message=\"line #1\""): <nil> failed: unexpectedly found data for log
--- FAIL: TestLogCursor (0.00s)
    --- FAIL: TestLogCursor/debian-cloud:debian-11 (0.00s)
        --- PASS: TestLogCursor/debian-cloud:debian-11/default (273.07s)
        --- FAIL: TestLogCursor/debian-cloud:debian-11/otel_logging (310.43s)

"poll_interval": "1s",
"ignore_channel_errors": true,
// TODO: Configure storage
"storage": fileStorageExtensionID(),
}

var p []otel.Component
Expand Down Expand Up @@ -966,6 +969,7 @@ func (r LoggingReceiverSystemd) Pipelines(ctx context.Context) ([]otel.ReceiverP
receiver_config := map[string]any{
"start_at": "beginning",
"priority": "debug",
"storage": fileStorageExtensionID(),
}

modify_fields_processors, err := LoggingProcessorModifyFields{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -818,6 +822,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
filelog/syslog:
exclude: []
include:
Expand All @@ -832,6 +837,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -870,6 +876,8 @@ receivers:
- targets:
- 0.0.0.0:20201
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_syslog:
exporters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -789,6 +793,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
filelog/syslog:
exclude: []
include:
Expand All @@ -803,6 +808,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -839,6 +845,8 @@ receivers:
- targets:
- 0.0.0.0:20201
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_syslog:
exporters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=windows;ShortName=win_platform;ShortVersion=win_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -1180,6 +1184,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -1223,16 +1228,19 @@ receivers:
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowseventlog/windows__event__log_1:
channel: Application
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowseventlog/windows__event__log_2:
channel: Security
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowsperfcounters/iis:
collection_interval: 60s
perfcounters:
Expand Down Expand Up @@ -1266,6 +1274,8 @@ receivers:
- _Total
object: SQLServer:Databases
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_windows__event__log:
exporters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=windows;ShortName=win_platform;ShortVersion=win_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -1180,6 +1184,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -1223,16 +1228,19 @@ receivers:
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowseventlog/windows__event__log_1:
channel: Application
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowseventlog/windows__event__log_2:
channel: Security
ignore_channel_errors: true
poll_interval: 1s
start_at: beginning
storage: file_storage
windowsperfcounters/iis:
collection_interval: 60s
perfcounters:
Expand Down Expand Up @@ -1266,6 +1274,8 @@ receivers:
- _Total
object: SQLServer:Databases
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_windows__event__log:
exporters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -791,6 +795,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
filelog/syslog:
exclude: []
include:
Expand All @@ -805,6 +810,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -843,6 +849,8 @@ receivers:
- targets:
- 0.0.0.0:20201
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_syslog:
exporters:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ exporters:
service_resource_labels: true
skip_create_descriptor: true
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
extensions:
file_storage:
create_directory: true
directory: file_storage
processors:
agentmetrics/hostmetrics_0:
blank_label_metrics:
Expand Down Expand Up @@ -762,6 +766,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
filelog/syslog:
exclude: []
include:
Expand All @@ -776,6 +781,7 @@ receivers:
preserve_leading_whitespaces: true
preserve_trailing_whitespaces: true
start_at: beginning
storage: file_storage
hostmetrics/hostmetrics:
collection_interval: 60s
scrapers:
Expand Down Expand Up @@ -812,6 +818,8 @@ receivers:
- targets:
- 0.0.0.0:20201
service:
extensions:
- file_storage
pipelines:
logs/logs_default__pipeline_syslog:
exporters:
Expand Down
Loading
Loading