Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 61 additions & 11 deletions internal/hcs/callback.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
package hcs

import (
"context"
"encoding/json"
"fmt"
"sync"
"syscall"

"github.com/sirupsen/logrus"

"github.com/Microsoft/hcsshim/internal/interop"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/vmcompute"
"github.com/sirupsen/logrus"
)

var (
Expand Down Expand Up @@ -132,32 +136,78 @@ func closeChannels(channels notificationChannels) {
}
}

func notificationWatcher(notificationType hcsNotification, callbackNumber uintptr, notificationStatus uintptr, notificationData *uint16) uintptr {
var result error
if int32(notificationStatus) < 0 {
result = interop.Win32FromHresult(notificationStatus)
func notificationWatcher(
notificationType hcsNotification,
callbackNumber uintptr,
notificationStatus uintptr,
notificationData *uint16,
) uintptr {
ctx, entry := log.SetEntry(context.Background(), logrus.Fields{
"notification-type": notificationType.String(),
})

result := processNotification(ctx, notificationStatus, notificationData)
if result != nil {
entry.Data[logrus.ErrorKey] = result
}

callbackMapLock.RLock()
context := callbackMap[callbackNumber]
callbackMapLock.RUnlock()

if context == nil {
entry.WithField("context", callbackNumber).Warn("Received notification for unknown context (callback number)")
return 0
}

log := logrus.WithFields(logrus.Fields{
"notification-type": notificationType.String(),
"system-id": context.systemID,
})
entry.Data["system-id"] = context.systemID
if context.processID != 0 {
log.Data[logfields.ProcessID] = context.processID
entry.Data[logfields.ProcessID] = context.processID
}
log.Debug("HCS notification")
entry.Debug("HCS notification")

if channel, ok := context.channels[notificationType]; ok {
channel <- result
}

return 0
}

// processNotification parses and validates HCS notifications and returns the result as an error.
func processNotification(ctx context.Context, notificationStatus uintptr, notificationData *uint16) (err error) {
// TODO: merge/unify with [processHcsResult]
status := int32(notificationStatus)
if status < 0 {
err = interop.Win32FromHresult(notificationStatus)
}

if notificationData == nil {
return err
}

resultJSON := interop.ConvertAndFreeCoTaskMemString(notificationData)
result := &hcsResult{}
if jsonErr := json.Unmarshal([]byte(resultJSON), result); jsonErr != nil {
log.G(ctx).WithFields(logrus.Fields{
logfields.JSON: resultJSON,
logrus.ErrorKey: err,
}).Warn("Could not unmarshal HCS result")
return err
}

// the HResult and data payload should have the same error value
if result.Error < 0 && status != result.Error {
log.G(ctx).WithFields(logrus.Fields{
"status": status,
"data": result.Error,
}).Warn("Mismatched status and data HResult values")
}

if len(result.ErrorEvents) > 0 {
return &resultError{
Err: err,
Events: result.ErrorEvents,
}
}
return err
}
118 changes: 88 additions & 30 deletions internal/hcs/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ import (
"errors"
"fmt"
"net"
"strings"
"syscall"

"github.com/sirupsen/logrus"

"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
)

var (
Expand Down Expand Up @@ -99,48 +103,85 @@ type ErrorEvent struct {
EventID uint16 `json:"EventId,omitempty"`
Flags uint32 `json:"Flags,omitempty"`
Source string `json:"Source,omitempty"`
//Data []EventData `json:"Data,omitempty"` // Omit this as HCS doesn't encode this well. It's more confusing to include. It is however logged in debug mode (see processHcsResult function)
}

type hcsResult struct {
Error int32
ErrorMessage string
ErrorEvents []ErrorEvent `json:"ErrorEvents,omitempty"`
// Omit this as HCS doesn't encode this well. It's more confusing to include.
// It is however logged in during [processHcsResult] errors.
//Data []EventData `json:"Data,omitempty"`
}

func (ev *ErrorEvent) String() string {
evs := "[Event Detail: " + ev.Message
sb := strings.Builder{}
ev.writeTo(&sb)
return sb.String()
}

func (ev *ErrorEvent) writeTo(b *strings.Builder) {
// rough wag at needed length
b.Grow(64 + len(ev.Message) + len(ev.StackTrace) + len(ev.Provider) + len(ev.Source))

// [strings.Builder] Write* functions always return nil errors
_, _ = b.WriteString("[Event Detail: " + ev.Message)
if ev.StackTrace != "" {
evs += " Stack Trace: " + ev.StackTrace
_, _ = b.WriteString(" Stack Trace: " + ev.StackTrace)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have a mechanism to handle the error in the builder method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the error is always nil, so there shouldn't be any error handling needed

}
if ev.Provider != "" {
evs += " Provider: " + ev.Provider
_, _ = b.WriteString(" Provider: " + ev.Provider)
}
if ev.EventID != 0 {
evs = fmt.Sprintf("%s EventID: %d", evs, ev.EventID)
fmt.Fprintf(b, " EventID: %d", ev.EventID)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should perhaps use the same semantics for writing i.e. WriteString.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wed have to call b.WriteString(fmt.Sprintf(" EventID: %d", ev.EventID)) to handle the formatting, which feels more convoluted than Fprintf

}
if ev.Flags != 0 {
evs = fmt.Sprintf("%s flags: %d", evs, ev.Flags)
fmt.Fprintf(b, " flags: %d", ev.Flags)
}
if ev.Source != "" {
evs += " Source: " + ev.Source
_, _ = b.WriteString(" Source: " + ev.Source)
}
evs += "]"
return evs
_ = b.WriteByte(']')

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we using WriteByte instead of WriteString?

}

type hcsResult struct {
Error int32
ErrorMessage string
ErrorEvents []ErrorEvent `json:"ErrorEvents,omitempty"`
// TODO: AttributionRecords
}

func processHcsResult(ctx context.Context, resultJSON string) []ErrorEvent {
if resultJSON != "" {
result := &hcsResult{}
if err := json.Unmarshal([]byte(resultJSON), result); err != nil {
log.G(ctx).WithError(err).Warning("Could not unmarshal HCS result")
log.G(ctx).WithFields(logrus.Fields{
logfields.JSON: resultJSON,
logrus.ErrorKey: err,
}).Warn("Could not unmarshal HCS result")
return nil
}
return result.ErrorEvents
}
return nil
}

// resultError allows passing the events along with the error from [notificationWatcher]
// so they can both be including in the resulting [makeSystemError]/[makeProcessError] call.
//
// errorMessage **should** be the same as err.Error(), so it can safely be ignored.
type resultError struct {
Err error
Events []ErrorEvent
}

func (e *resultError) Error() string {
return appendErrorEvents(e.Err.Error(), e.Events)
}

func (e *resultError) Is(target error) bool {
return errors.Is(e.Err, target)
}

func (e *resultError) Unwrap() error {
return e.Err
}

type HcsError struct {
Op string
Err error
Expand All @@ -150,11 +191,7 @@ type HcsError struct {
var _ net.Error = &HcsError{}

func (e *HcsError) Error() string {
s := e.Op + ": " + e.Err.Error()
for _, ev := range e.Events {
s += "\n" + ev.String()
}
return s
return appendErrorEvents(e.Op+": "+e.Err.Error(), e.Events)
}

func (e *HcsError) Is(target error) bool {
Expand Down Expand Up @@ -194,11 +231,7 @@ type SystemError struct {
var _ net.Error = &SystemError{}

func (e *SystemError) Error() string {
s := e.Op + " " + e.ID + ": " + e.Err.Error()
for _, ev := range e.Events {
s += "\n" + ev.String()
}
return s
return appendErrorEvents(e.Op+" "+e.ID+": "+e.Err.Error(), e.Events)
}

func makeSystemError(system *System, op string, err error, events []ErrorEvent) error {
Expand All @@ -208,6 +241,7 @@ func makeSystemError(system *System, op string, err error, events []ErrorEvent)
return err
}

events, err = getEvents(events, err)
return &SystemError{
ID: system.ID(),
HcsError: HcsError{
Expand All @@ -228,11 +262,7 @@ type ProcessError struct {
var _ net.Error = &ProcessError{}

func (e *ProcessError) Error() string {
s := fmt.Sprintf("%s %s:%d: %s", e.Op, e.SystemID, e.Pid, e.Err.Error())
for _, ev := range e.Events {
s += "\n" + ev.String()
}
return s
return appendErrorEvents(fmt.Sprintf("%s %s:%d: %s", e.Op, e.SystemID, e.Pid, e.Err.Error()), e.Events)
}

func makeProcessError(process *Process, op string, err error, events []ErrorEvent) error {
Expand All @@ -241,6 +271,8 @@ func makeProcessError(process *Process, op string, err error, events []ErrorEven
if errors.As(err, &e) {
return err
}

events, err = getEvents(events, err)
return &ProcessError{
Pid: process.Pid(),
SystemID: process.SystemID(),
Expand All @@ -252,6 +284,32 @@ func makeProcessError(process *Process, op string, err error, events []ErrorEven
}
}

// getEvents checks to see if err has events associated with it, and if so, returns those.
// This is used to flatten an [HceError], [SystemError], or [ProcessError] that wraps a [resultError]
func getEvents(events []ErrorEvent, err error) ([]ErrorEvent, error) {
// only return nested events if the original events is empty.
// don't use [errors.As], since that will unwrap the error chain and drop the parents.
if resErr, ok := err.(*resultError); err != nil && ok && len(events) == 0 { //nolint:errorlint
return resErr.Events, resErr.Err
}
return events, err
}

func appendErrorEvents(s string, events []ErrorEvent) string {
if len(events) == 0 {
return s
}

sb := &strings.Builder{}
_, _ = sb.WriteString(s)
for _, ev := range events {
// don't join with newlines since those are ... awkward within error strings
_, _ = sb.WriteString(": ")
ev.writeTo(sb)
}
return sb.String()
}

// IsNotExist checks if an error is caused by the Container or Process not existing.
// Note: Currently, ErrElementNotFound can mean that a Process has either
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
Expand Down
Loading