Skip to content

Commit

Permalink
Merge pull request juju#6905 from jameinel/2.1-find-bridges-correctly
Browse files Browse the repository at this point in the history
Find lxdbr0 even when it doesn't have an address.

When looking on the host machine for how we might want to
configure a container, we actually weren't finding 'lxdbr0'
because it didn't have an address. This changes the
machine.LinkLayerDevicesForSpaces so that it first builds
up devices based on their addresses, and then does a
pass to create the devices based on what they are
connected to.

This properly connects things together for places like
AWS that only have the local bridge.

## QA steps
```
  $ juju bootstrap ?
  $ juju switch controller
  $ juju add-machine lxd:0
  $ juju debug-log ...
```

Without this patch, you'll see lines like:
```
machine-0: 11:44:07 DEBUG juju.network.containerizer for container "0/lxd/0", found desired spaces: <none>
machine-0: 11:44:07 DEBUG juju.network.containerizer container "0/lxd/0" not qualified to a space, host machine "0" is using spaces <none>
machine-0: 11:44:07 DEBUG juju.network.containerizer container has no desired spaces, and host has no known spaces, triggering fallback to bridge
machine-0: 11:44:07 DEBUG juju.network.containerizer FindMissingBridgesForContainer("0/lxd/0") spaces "" devices map{"":["ens4"]}
machine-0: 11:44:07 INFO juju.network bridgescript command=/usr/bin/python2 - --interfaces-to-bridge="ens4" --activate --bridge-prefix=br-  --rec
```

Which indicates it was trying to use the host device (it fails so the later fallback to use lxdbr0 when the one we wanted fails makes it ultimately succeed by accident.)

With this patch, you'll see
```
machine-0: 13:12:15 DEBUG juju.network.containerizer FindMissingBridgesForContainer("0/lxd/2") spaces "" devices map{"":["ens4","lxdbr0"]}
machine-0: 13:12:15 DEBUG juju.provisioner.lxd using multi-bridge networking for container "0/lxd/2"
machine-0: 13:12:15 DEBUG juju.network.containerizer for container "0/lxd/2", found desired spaces: <none>
machine-0: 13:12:15 DEBUG juju.network.containerizer container "0/lxd/2" not qualified to a space, host machine "0" is using spaces <none>
machine-0: 13:12:15 DEBUG juju.network.containerizer container has no desired spaces, and host has no known spaces, triggering fallback to bridge
machine-0: 13:12:15 DEBUG juju.network.containerizer for container "0/lxd/2", found host devices spaces: map{"":["ens4","lxdbr0"]}
machine-0: 13:12:15 DEBUG juju.network.containerizer for container "0/lxd/2" using host machine "0" bridge devices: "lxdbr0"
machine-0: 13:12:15 DEBUG juju.network.containerizer prepared container "0/lxd/2" network config: [{Name:eth0 MTU:1500 ProviderID: Type:ethernet
:true IsUp:true ParentName:m#0#d#lxdbr0}]
```

Which means that we correctly identified that 'lxdbr0' should be used (there should be no call to the bridging script.)

## Documentation changes

This is fixing containers under the covers, so no doc changes.

## Bug reference

This is all part of doing https://bugs.launchpad.net/juju/+bug/1657449 correctly.
It also is a step along the path for https://bugs.launchpad.net/juju/+bug/1656326 because by directly requesting 'lxdbr0' we can remove the code that 'falls back' to lxdbr0.
  • Loading branch information
jujubot authored Feb 2, 2017
2 parents 64be732 + 775f4b9 commit ec0470a
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 35 deletions.
1 change: 1 addition & 0 deletions apiserver/provisioner/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,7 @@ type hostChangesContext struct {
func (ctx *hostChangesContext) ProcessOneContainer(env environs.Environ, idx int, host, container *state.Machine) error {
bridgePolicy := containerizer.BridgePolicy{
NetBondReconfigureDelay: env.Config().NetBondReconfigureDelay(),
UseLocalBridges: !environs.SupportsContainerAddresses(env),
}
bridges, reconfigureDelay, err := bridgePolicy.FindMissingBridgesForContainer(host, container)
if err != nil {
Expand Down
26 changes: 26 additions & 0 deletions network/containerizer/bridgepolicy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,32 @@ func (s *bridgePolicyStateSuite) TestFindMissingBridgesForContainerUseLocalBridg
c.Check(reconfigureDelay, gc.Equals, 0)
}

func (s *bridgePolicyStateSuite) TestFindMissingBridgesForContainerUseLocalBridgesNoAddress(c *gc.C) {
// We should only use 'lxdbr0' instead of bridging the host device.
s.setupTwoSpaces(c)
s.createNICWithIP(c, s.machine, "ens3", "172.12.0.10/24")
err := s.machine.SetLinkLayerDevices(
state.LinkLayerDeviceArgs{
Name: "lxdbr0",
Type: state.BridgeDevice,
ParentName: "",
IsUp: true,
},
)
c.Assert(err, jc.ErrorIsNil)
s.addContainerMachine(c)
bridgePolicy := &containerizer.BridgePolicy{
NetBondReconfigureDelay: 13,
UseLocalBridges: true,
}
// No defined spaces for the container, no *known* spaces for the host
// machine. Triggers the fallback code to have us bridge all devices.
missing, reconfigureDelay, err := bridgePolicy.FindMissingBridgesForContainer(s.machine, s.containerMachine)
c.Assert(err, jc.ErrorIsNil)
c.Check(missing, jc.DeepEquals, []network.DeviceToBridge{})
c.Check(reconfigureDelay, gc.Equals, 0)
}

func (s *bridgePolicyStateSuite) TestFindMissingBridgesForContainerUnknownWithConstraint(c *gc.C) {
// If we have a host machine where we don't understand its spaces, but
// the container requests a specific space, we won't use the unknown
Expand Down
27 changes: 27 additions & 0 deletions state/linklayerdevices_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,33 @@ func (s *linkLayerDevicesStateSuite) TestLinkLayerDevicesForSpacesWithUnknown(c
c.Check(devices[0].Type(), gc.Equals, state.BridgeDevice)
}

func (s *linkLayerDevicesStateSuite) TestLinkLayerDevicesForSpacesWithNoAddress(c *gc.C) {
// We create a record for the 'lxdbr0' bridge, but it doesn't have an
// address yet (which is the case when we first show up on a machine.)
err := s.machine.SetLinkLayerDevices(
state.LinkLayerDeviceArgs{
Name: "lxdbr0",
Type: state.BridgeDevice,
ParentName: "",
IsUp: true,
},
)
c.Assert(err, jc.ErrorIsNil)
// unknown space
s.createNICWithIP(c, s.machine, "ens5", "192.168.10.12/24")
res, err := s.machine.LinkLayerDevicesForSpaces([]string{""})
c.Assert(err, jc.ErrorIsNil)
c.Assert(res, gc.HasLen, 1)
devices, ok := res[""]
c.Assert(ok, jc.IsTrue)
c.Assert(devices, gc.HasLen, 2)
names := make([]string, len(devices))
for i, dev := range devices {
names[i] = dev.Name()
}
c.Check(names, gc.DeepEquals, []string{"ens5", "lxdbr0"})
}

func (s *linkLayerDevicesStateSuite) TestLinkLayerDevicesForSpacesUnknownIgnoresLoopAndIncludesKnownBridges(c *gc.C) {
// TODO(jam): 2016-12-28 arguably we should also be aware of Docker
// devices, possibly the better plan is to look at whether there are
Expand Down
82 changes: 48 additions & 34 deletions state/machine_linklayerdevices.go
Original file line number Diff line number Diff line change
Expand Up @@ -911,10 +911,11 @@ func deviceMapToSortedList(deviceMap map[string]*LinkLayerDevice) []*LinkLayerDe
return result
}

// LinkLayerDevicesForSpaces takes a list of spaces, and returns the devices
// on this machine that are in that space. (We currently only return devices
// that have an address, which are thus targets for being bridged, or are
// themselves a bridge that can be used.)
// LinkLayerDevicesForSpaces takes a list of spaces, and returns the devices on
// this machine that are in that space that we feel would be useful for
// containers to know about. (eg, if there is a host device that has been
// bridged, we return the bridge, rather than the underlying device, but if we
// have only the host device, we return that.)
// Note that devices like 'lxdbr0' that are bridges that might might not be
// externally accessible may be returned if "" is listed as one of the desired
// spaces.
Expand All @@ -923,59 +924,72 @@ func (m *Machine) LinkLayerDevicesForSpaces(spaces []string) (map[string][]*Link
if err != nil {
return nil, errors.Trace(err)
}
devices, err := m.AllLinkLayerDevices()
if err != nil {
return nil, errors.Trace(err)
}
deviceByName := make(map[string]*LinkLayerDevice, len(devices))
for _, dev := range devices {
deviceByName[dev.Name()] = dev
}
requestedSpaces := set.NewStrings(spaces...)
spaceToDevices := make(map[string]map[string]*LinkLayerDevice, 0)
// TODO(jam): 2016-12-08 We look up each subnet one-by-one, and then look
// up each Link-Layer-Device one-by-one, it feels like we should
// 'aggregate all subnet CIDR' and then grab them in one pass, and then
// filter them to find the link layer devices we care about, and ask for
// them in a single pass again.
// Further, we should be tracking this more by Layer 2 connections, rather
// than requiring CIDR. Eventually we want to get rid of host network
// devices from having an IP address at all, so that only the containers
// have Layer 3 addresses.
processedDeviceNames := set.NewStrings()
includeDevice := func(spaceName string, device *LinkLayerDevice) {
spaceInfo, ok := spaceToDevices[spaceName]
if !ok {
spaceInfo = make(map[string]*LinkLayerDevice)
spaceToDevices[spaceName] = spaceInfo
}
spaceInfo[device.Name()] = device
}
// First pass, iterate the addresses, lookup the associated spaces, and
// gather the devices.
for _, addr := range addresses {
subnet, err := addr.Subnet()
spaceName := ""
if err != nil {
if errors.IsNotFound(err) {
// We record all addresses that we find on the
// machine. However, some devices may have IP
// addresses that are not in known subnets or spaces.
// (loopback devices aren't in a space, arbitrary
// application based addresses, etc.)
// The caller can request those devices by asking us for the
// "" space.
if !requestedSpaces.Contains("") {
continue
}
// unknown subnets are considered part of the "unknown" space
spaceName = ""
} else {
// We don't understand the error, so error out for now
return nil, errors.Trace(err)
}
} else {
spaceName = subnet.SpaceName()
}
if !requestedSpaces.Contains(spaceName) {
continue
}
device, err := addr.Device()
if err != nil {
return nil, errors.Trace(err)
device, ok := deviceByName[addr.DeviceName()]
if !ok {
return nil, errors.Errorf("address %v for machine %q refers to a missing device %q",
addr, m.Id(), addr.DeviceName())
}
processedDeviceNames.Add(device.Name())
if device.Type() == LoopbackDevice {
// We skip loopback devices here
continue
}
spaceInfo, ok := spaceToDevices[spaceName]
if !ok {
spaceInfo = make(map[string]*LinkLayerDevice)
spaceToDevices[spaceName] = spaceInfo
includeDevice(spaceName, device)
}
// Now grab any devices we may have missed. For now, any device without an
// address must be in the "unknown" space.
for devName, device := range deviceByName {
if processedDeviceNames.Contains(devName) {
continue
}
spaceInfo[device.Name()] = device
// Loopback devices aren't considered part of the empty space
// Also, devices that are attached to another device also aren't
// considered to be in the unknown space.
if device.Type() == LoopbackDevice || device.ParentName() != "" {
continue
}
includeDevice("", device)
}
result := make(map[string][]*LinkLayerDevice, len(spaceToDevices))
for spaceName, deviceMap := range spaceToDevices {
if !requestedSpaces.Contains(spaceName) {
continue
}
result[spaceName] = deviceMapToSortedList(deviceMap)
}
return result, nil
Expand Down
1 change: 0 additions & 1 deletion worker/machiner/machiner.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ func setMachineAddresses(tag names.MachineTag, m Machine) error {
}

func (mr *Machiner) Handle(_ <-chan struct{}) error {
logger.Infof("got a Machiner.Handle() event")
if err := mr.machine.Refresh(); params.IsCodeNotFoundOrCodeUnauthorized(err) {
// NOTE(axw) we can distinguish between NotFound and CodeUnauthorized,
// so we could call NotifyMachineDead here in case the agent failed to
Expand Down

0 comments on commit ec0470a

Please sign in to comment.