cloudera · mesmorkalov · Apr 5, 2016 · Apr 7, 2016 · kambatla · Apr 6, 2016
diff --git a/llama-dist/src/main/conf/llama-site.xml b/llama-dist/src/main/conf/llama-site.xml
@@ -472,6 +472,14 @@
       before the cache.
     </description>
   </property>
+  <property>
+    <name>llama.am.resource.normalizing.enabled.#QUEUE#</name>
+    <value>true</value>
+    <description>
+      Per queue setting that indicates whether to break resource requests into smaller requests of standard size
+      before the cache.
+    </description>
+  </property>
   <property>
     <name>llama.am.resource.normalizing.standard.mbs</name>
     <value>1024</value>

diff --git a/llama/src/main/java/com/cloudera/llama/am/api/LlamaAM.java b/llama/src/main/java/com/cloudera/llama/am/api/LlamaAM.java
@@ -95,7 +95,7 @@ public abstract class LlamaAM {
   public static final long GANG_ANTI_DEADLOCK_BACKOFF_MAX_DELAY_DEFAULT = 30000;
 
   public static final String CACHING_ENABLED_KEY =
-      PREFIX_KEY + "caching.enabled";
+      PREFIX_KEY + "cache.enabled";
   public static final boolean CACHING_ENABLED_DEFAULT = true;
 
   public static final String THROTTLING_ENABLED_KEY =

diff --git a/llama/src/main/java/com/cloudera/llama/am/impl/SingleQueueLlamaAM.java b/llama/src/main/java/com/cloudera/llama/am/impl/SingleQueueLlamaAM.java
@@ -122,6 +122,8 @@ private  RMConnector createRMConnector() {
           NORMALIZING_ENABLED_DEFAULT);
       caching = getConf().getBoolean(
           CACHING_ENABLED_KEY + "." + queue, caching);
+      normalizing = getConf().getBoolean(
+              NORMALIZING_ENABLED_KEY + "." + queue, normalizing);
       LOG.info("Caching for queue '{}' enabled '{}'", queue,
           caching);
       if (caching && normalizing) {

diff --git a/llama/src/main/java/com/cloudera/llama/am/yarn/YarnRMConnector.java b/llama/src/main/java/com/cloudera/llama/am/yarn/YarnRMConnector.java
@@ -573,6 +573,11 @@ private void _reserve(Collection<RMResource> resources)
       resource.getRmData().put("request", request);
 
       resource.getRmData().put(YARN_RM_CONNECTOR_KEY, this);
+
+      /*Keeping resources which relax locality in the separate map to handle them when possible*/
+      if(resource.getLocalityAsk()!= com.cloudera.llama.am.api.Resource.Locality.MUST) {
+        anyLocationResourceIdToRequestMap.put(resource.getResourceId(), request);
+      }
     }
   }
 
@@ -660,6 +665,9 @@ public void emptyCache() throws LlamaException {
   ConcurrentHashMap<ContainerId, UUID> containerToResourceMap =
       new ConcurrentHashMap<ContainerId, UUID>();
 
+  ConcurrentHashMap<UUID, LlamaContainerRequest> anyLocationResourceIdToRequestMap =
+          new ConcurrentHashMap<UUID, LlamaContainerRequest>();
+
   @Override
   public void onContainersCompleted(List<ContainerStatus> containerStatuses) {
     List<RMEvent> changes = new ArrayList<RMEvent>();
@@ -772,6 +780,7 @@ private RMEvent createResourceAllocation(RMResource resources,
   public void onContainersAllocated(List<Container> containers) {
     List<RMEvent> changes = new ArrayList<RMEvent>();
     // no need to use a ugi.doAs() as this is called from within Yarn client
+    List<Container> unclaimedContainers = new ArrayList<Container>();
     for (Container container : containers) {
       List<? extends Collection<LlamaContainerRequest>> matchingContainerReqs =
           amRmClientAsync.getMatchingRequests(container.getPriority(),
@@ -806,10 +815,51 @@ public void onContainersAllocated(List<Container> containers) {
           LOG.trace("Reservation resource '{}' removed from YARN", resource);
 
           queue(new ContainerHandler(ugi, resource, container, Action.START));
+
+          /*Remove the granted request from anyLocationResourceIdToRequestMap if it is there*/
+          anyLocationResourceIdToRequestMap.remove(resource.getResourceId());
         }
       } else {
-        LOG.error("No matching request for {}. Releasing the container.",
+        LOG.debug("No strong request match for {}. Adding to the list of unclaimed containers.",
             container);
+        unclaimedContainers.add(container);
+      }
+    }
+    /*Matching YARN resources against requests relaxing locality*/
+    /*Doing this in the separate loop as strong match should be preferred */
+    for (Container container : unclaimedContainers) {
+      /*Find pending request that relax locality which can get use of unclaimed containers*/
+      boolean containerIsClaimed = false;
+      Iterator<Map.Entry<UUID, LlamaContainerRequest>> iterator = anyLocationResourceIdToRequestMap.entrySet().iterator();
+      while (iterator.hasNext()) {
+        Map.Entry<UUID, LlamaContainerRequest> entry = iterator.next();
+        LlamaContainerRequest request = entry.getValue();
+        /*Matching by the capacity only*/
+        if(request.getResourceAsk().getCpuVCoresAsk() == container.getResource().getVirtualCores() &&
+                request.getResourceAsk().getMemoryMbsAsk() == container.getResource().getMemory()) {
+
+          RMResource resource = request.getResourceAsk();
+
+          LOG.debug("New allocation for '{}' container '{}', node '{}'",
+                  resource, container.getId(), container.getNodeId());
+
+          resource.getRmData().put("container", container);
+          containerToResourceMap.put(container.getId(),
+                  resource.getResourceId());
+          changes.add(createResourceAllocation(resource, container));
+          amRmClientAsync.removeContainerRequest(request);
+          LOG.trace("Reservation resource '{}' removed from YARN", resource);
+
+          queue(new ContainerHandler(ugi, resource, container, Action.START));
+
+          iterator.remove();
+          containerIsClaimed = true;
+          break;
+        }
+      }
+      if(!containerIsClaimed) {
+        LOG.error("No matching request for {}. Releasing the container.",
+                container);
         containerToResourceMap.remove(container.getId());
         amRmClientAsync.releaseAssignedContainer(container.getId());
       }