mailgun
diff --git a/‎Dockerfile
-3 b/‎Dockerfile
-3
diff --git a/‎README.md
+129-54 b/‎README.md
+129-54
diff --git a/‎docker-compose.yaml
-6 b/‎docker-compose.yaml
-6
diff --git a/‎go.mod
+1 b/‎go.mod
+1
diff --git a/‎golang/algorithms.go
+10-1 b/‎golang/algorithms.go
+10-1
diff --git a/‎golang/cache/types.go
+1-1 b/‎golang/cache/types.go
+1-1
diff --git a/‎golang/cluster/cluster.go
+18-1 b/‎golang/cluster/cluster.go
+18-1
@@ -1,6 +1,3 @@
-# TODO: https://stackoverflow.com/questions/1722807/how-to-convert-git-urls-to-http-urls
-# TODO: https://medium.com/@tonistiigi/build-secrets-and-ssh-forwarding-in-docker-18-09-ae8161d066
-
 # Build image
 FROM golang:1.11.2 as build
 
 
@@ -1,32 +1,37 @@
 # Gubernator
 
-Gubernator is a rate limiting service which calculates rate limits
-via a configurable algorithm.
+Gubernator is a distributed, high performance, cloud native and stateless rate
+limiting service designed to support many different rate limiting scenarios.
+
+#### Scenarios
+* Meter ingress traffic
+* Meter egress traffic
+* Limit bursts on network queues
+* Enforce capacity limits on network services
 
 ## Architecture overview
 
 ![gubernator arch image](/architecture.png)
 
-Gubernator is designed to run as a cluster of peers which utilize an
-in memory cache of all the currently active rate limits, no data is 
-ever synced to disk. Since most ingress HTTP rate limit durations are 
-held for only a few seconds losing the in memory cache during a reboot 
-or scheduled downtime isn't a huge deal. For Gubernator we choose 
-performance over accuracy as it's acceptable for a small subset of 
-traffic to be allowed to over request for a short period of time. 
-Gubernator could be expanded in the future to store rate limits with 
-longer durations to disk, but currently this is not supported.
-
-When a rate limit request is made to Gubernator the request is keyed and 
-a consistent hashing algorithm is applied to determine which of the 
-peers will be the coordinator for the rate limit request. Choosing a
-single coordinator for a rate limit makes atomic increments of counts very fast 
-and avoids the complexity and latency involved in distributing counts consistently 
-across a cluster of peers. Although simple and performant this design can be 
-susceptible to a thundering herd of requests since a single coordinator is responsible
-for possibly hundreds of thousands of requests to a rate limit. To combat this the 
-server can take multiple requests within a specified window and batch the requests 
-into a single peer request, thus reducing the total number of requests to a single 
+Gubernator is designed to run as a distributed cluster of peers which utilize
+an in memory cache of all the currently active rate limits, as such no data is
+ever synced to disk. Since most network based rate limit durations are held for
+only a few seconds losing the in memory cache during a reboot or scheduled
+downtime isn't a huge deal. For Gubernator we choose performance over accuracy
+as it's acceptable for a small subset of traffic to be allowed to over request
+for a short period of time (usually milliseconds) in the case of cache loss.
+
+When a rate limit request is made to Gubernator the request is keyed and a
+consistent hashing algorithm is applied to determine which of the peers will be
+the owner of the rate limit request. Choosing a single owner for a rate limit
+makes atomic increments of counts very fast and avoids the complexity and
+latency involved in distributing counts consistently across a cluster of peers.
+
+Although simple and performant this design can be susceptible to a thundering
+herd of requests since a single coordinator is responsible for possibly
+hundreds of thousands of requests to a rate limit. To combat this peers can
+take multiple requests within a specified window and batch the requests into a
+single peer request, thus reducing the total number of requests to a single
 Gubernator peer tremendously.
 
 To ensure each peer in the cluster accurately calculates the correct hash
@@ -38,18 +43,24 @@ simplify deployment.
 
 ## Gubernator Operation
 
-Gubernator does not read from a list of pre-configured rate limits. 
-Instead each request to a peer includes the rate limit to be applied to the request.
-This allows clients who understand their rate limit problem space to create and apply 
-new rate limit configurations without the need of an out of band process to configure
-the rate limiting service. 
+Unlike other generic rate limit service implementations, Gubernator does not have
+the concept of pre-configured rate limit that clients make requests against.
+Instead each request to the service includes the rate limit config to be
+applied to the request. This allows clients the flexibility to govern their
+rate limit problem domain without the need to coordinate rate limit
+configuration deployments with Gubernator.
 
-The rate limit configuration is stored with the current rate limit in the local cache of
-the coordinator owner. Rate limits and their configuration that are stored in the local 
-cache will only exist for the specified duration of the rate limit configuration. After 
-the duration time has expired, and if the rate limit was not requested again within the 
-duration it is dropped from the cache. Subsequent requests for the same unique_key will 
-recreate the config and rate limit in the cache and the cycle will repeat.
+When a client or service makes a request to Gubernator the rate limit config is
+provided with each request by the client. The rate limit configuration is then
+stored with the current rate limit status in the local cache of the rate limit
+owner. Rate limits and their configuration that are stored in the local cache
+will only exist for the specified duration of the rate limit configuration.
+After the duration time has expired, and if the rate limit was not requested
+again within the duration it is dropped from the cache. Subsequent requests for
+the same `name` and `unique_key` pair will recreate the config and rate limit
+in the cache and the cycle will repeat.  Subsequent requests with different
+configs will overwrite the previous config and will apply the new config
+immediately.
 
 An example rate limit request sent via GRPC might look like the following
 ```yaml
@@ -58,7 +69,7 @@ rate_limits:
     # other applications that might also use the same unique_key
     name: requests_per_sec
     # A unique_key that identifies this rate limit request
-    unique_key: account_id=24b00c590856900ee961b275asdfd|source_ip=172.0.0.1
+    unique_key: account_id=123|source_ip=172.0.0.1
     # The number of hits we are requesting
     hits: 1
     # The total number of requests allowed for this rate limit
@@ -69,6 +80,11 @@ rate_limits:
     # 0 = Token Bucket
     # 1 = Leaky Bucket
     algorithm: 0
+    # The behavior of the rate limit in gubernator.
+    # 0 = BATCHING (Enables batching of requests to peers)
+    # 1 = NO_BATCHING (Disables batching)
+    # 2 = GLOBAL (Enable global caching for this rate limit)
+    behavior: 0
 ```
 
 And example response would be
@@ -88,34 +104,84 @@ rate_limits:
       # Additional metadata about the request the client might find useful
       metadata:
         # This is the name of the coordinator that rate limited this request
-        "owner": "api-n03.staging.us-east-1.definbox.com:9041"
+        "owner": "api-n03.staging.us-east-1.mailgun.org:9041"
 ```
 
 #### Rate limit Algorithm
 Gubernator currently supports 2 rate limit algorithms.
 
-1. [Token Bucket](https://en.wikipedia.org/wiki/Token_bucket) is useful for rate limiting very 
-bursty traffic. The downside to token bucket is that once you have hit the limit no more requests 
-are allowed until the configured rate limit duration resets the bucket to zero.
-2. [Leaky Bucket](https://en.wikipedia.org/wiki/Leaky_bucket) is useful for metering traffic
-at a consistent rate, as the bucket leaks at a consistent rate allowing traffic to continue
-without the need to wait for the configured rate limit duration to reset the bucket to zero.
+1. **Token Bucket** implementation starts with an empty bucket, then each `Hit`
+   adds a token to the bucket until the bucket is full. Once the bucket is
+   full, requests will return `OVER_LIMIT` until the `reset_time` is reached at
+   which point the bucket is emptied and requests will return `UNDER_LIMIT`.
+   This algorithm is useful for enforcing very bursty limits. (IE: Applications
+   where a single request can add more than 1 `hit` to the bucket; or non network
+   based queuing systems.) The downside to this implementation is that once you
+   have hit the limit no more requests are allowed until the configured rate
+   limit duration resets the bucket to zero.
+
+2. [Leaky Bucket](https://en.wikipedia.org/wiki/Leaky_bucket) is implemented
+   similarly to **Token Bucket** where `OVER_LIMIT` is returned when the bucket
+   is full. However tokens leak from the bucket at a consistent rate which is
+   calculated as `duration / limit`. This algorithm is useful for metering, as
+   the bucket leaks allowing traffic to continue without the need to wait for
+   the configured rate limit duration to reset the bucket to zero.
+
+## Global Limits
+Since Gubernator rate limits are hashed and handled by a single peer in the
+cluster. Rate limits that apply to every request in a data center would result
+in the rate limit request being handled by a single peer for the entirety of
+the data center.  For example, consider a rate limit with
+`name=requests_per_datacenter` and a `unique_id=us-east-1`. Now imagine that a
+request is made to Gubernator with this rate limit for every http request that
+enters the `us-east-1` data center. This could be hundreds of thousands,
+potentially millions of requests per second that are all hashed and handled by
+a single peer in the cluster. Because of this potential scaling issue
+Gubernator introduces a configurable `behavior` called `GLOBAL`.
+
+When a rate limit is configured with `behavior=GLOBAL` the rate limit request
+that is received from a client will not be forwarded to the owning peer but
+will be answered from an internal cache handled by the peer. `Hits` toward the
+rate limit will be batched by the receiving peer and sent asynchronously to the
+owning peer where the hits will be totaled and `OVER_LIMIT` calculated.  It
+is then the responsibility of the owning peer to update each peer in the
+cluster with the current status of the rate limit, such that peer internal
+caches routinely get updated with the most current rate limit status.
+
+##### Side effects of global behavior
+Since `Hits` are batched and forwarded to the owning peer asynchronously, the
+immediate response to the client will not include the most accurate `remaining`
+counts. As that count will only get updated after the async call to the owner
+peer is complete and the owning peer has had time to update all the peers in
+the cluster. As a result the use of `GLOBAL` allows for greater scale but at
+the cost of consistency. Using `GLOBAL` also  increases the amount of traffic
+per rate limit request. `GLOBAL` should only be used for extremely high volume
+rate limits that don't scale well with the traditional non `GLOBAL` behavior.
+
+## Performance
+TODO: Show some performance metrics of gubernator running in production
 
 ## API
-All Methods implement in GRPC are exposed to HTTP via the
+All methods are accessed via GRPC but are also exposed via HTTP using the
 [GRPC Gateway](https://github.com/grpc-ecosystem/grpc-gateway)
 
 #### Health Check
 Health check returns `unhealthy` in the event a peer is reported by etcd as `up` but the server
 instance is unable to contact the peer via it's advertised address.
 
+###### GRPC
+```grpc
+rpc HealthCheck (HealthCheckReq) returns (HealthCheckResp)
+```
+
+###### HTTP
 ```
 GET /v1/HealthCheck
 ```
 
 Example response:
 
-```javascript
+```json
 {
   "status": "healthy",
   "peer_count": 3
@@ -127,18 +193,24 @@ Rate limits can be applied or retrieved using this interface. If the client
 makes a request to the server with `hits: 0` then current state of the rate 
 limit is retrieved but not incremented.
 
+###### GRPC
+```grpc
+rpc GetRateLimits (GetRateLimitsReq) returns (GetRateLimitsResp)
+```
+
+###### HTTP
 ```
 POST /v1/GetRateLimits
 ```
 
 Example Payload
-```javascript
+```json
 {
-    "rate_limits": [
+    "requests":[
         {
+            "name": "requests_per_sec",
+            "unique_key": "account.id=1234",
             "hits": 1,
-            "namespace": "my-app",
-            "unique_key": "domain.id=1234",
             "duration": 60000,
             "limit": 10
         }
@@ -148,10 +220,11 @@ Example Payload
 
 Example response:
 
-```javascript
+```json
 {
-  "rate_limits": [
+  "responses":[
     {
+      "status": 0,
       "limit": "10",
       "remaining": "7",
       "reset_time": "1551309219226",
@@ -161,6 +234,10 @@ Example response:
 ```
 
 
+## Installation
+TODO: Show how to run gubernator in a docker container with just environs
+
+
 ## Development with Docker Compose
 Gubernator uses etcd to keep track of all it's peers. This peer list is
 used by the consistent hash to calculate which peer is the coordinator 
@@ -173,13 +250,11 @@ You will need to be on the VPN to pull docker images from the repository.
 # Start the containers
 $ docker-compose up -d
 
-# Run radar to create the configs in etcd (https://github.com/mailgun/radar)
-push_configs --etcd-endpoint localhost:2379 --env-names test,dev
-
 # Run gubernator
-export ETCD3_ENDPOINT=localhost:2379
-export MG_ENV=dev
 $ cd golang
 $ go run ./cmd/gubernator --config config.yaml
 ```
 
+### What kind of name is Gubernator?
+Gubernator is the [english pronunciation of governor](https://www.google.com/search?q=how+to+say+governor+in+russian&oq=how+to+say+govener+in+russ)
+in Russian, also it sounds cool.
@@ -14,9 +14,3 @@ services:
         -initial-cluster-state new
     ports:
       - "2379:2379"
-  metrics:
-    image: registry.postgun.com:5000/kubegun/metrics:latest
-    ports:
-      - "8125:8125/udp"
-      - "8081:81"
-      - "8080:80"
@@ -8,6 +8,7 @@ require (
 	github.com/coreos/go-semver v0.3.0 // indirect
 	github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e // indirect
 	github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
+	github.com/davecgh/go-spew v1.1.1
 	github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect
 	github.com/fatih/structs v1.1.0 // indirect
 	github.com/ghodss/yaml v1.0.0
 
@@ -99,7 +99,11 @@ func leakyBucket(c cache.Cache, r *RateLimitReq) (*RateLimitResp, error) {
 			b.LimitRemaining = b.Limit
 		}
 
-		b.TimeStamp = now
+		// Only update the TS if client is incrementing the hit
+		if r.Hits != 0 {
+			b.TimeStamp = now
+		}
+
 		rl := &RateLimitResp{
 			Limit:     b.Limit,
 			Remaining: b.LimitRemaining,
@@ -127,6 +131,11 @@ func leakyBucket(c cache.Cache, r *RateLimitReq) (*RateLimitResp, error) {
 			return rl, nil
 		}
 
+		// Client is only interested in retrieving the current status
+		if r.Hits == 0 {
+			return rl, nil
+		}
+
 		b.LimitRemaining -= r.Hits
 		rl.Remaining = b.LimitRemaining
 		c.UpdateExpiration(r.HashKey(), now*r.Duration)
 
@@ -1,7 +1,7 @@
 package cache
 
 // Interface accepts any cache which returns cache stats
-type CacheStats interface {
+type Stater interface {
 	Stats(bool) Stats
 }
 
 
@@ -7,6 +7,7 @@ import (
 	"github.com/sirupsen/logrus"
 	"google.golang.org/grpc"
 	"net"
+	"time"
 )
 
 type instance struct {
@@ -39,6 +40,16 @@ func GetPeer() string {
 	return gubernator.RandomPeer(peers)
 }
 
+// Returns a specific peer
+func PeerAt(idx int) string {
+	return peers[idx]
+}
+
+// Returns a specific instance
+func InstanceAt(idx int) *instance {
+	return instances[idx]
+}
+
 // Start a local cluster of gubernator servers
 func Start(numInstances int) error {
 	var addresses []string
@@ -53,7 +64,13 @@ func StartWith(addresses []string) error {
 	for _, address := range addresses {
 		srv := grpc.NewServer()
 
-		guber, err := gubernator.New(gubernator.Config{GRPCServer: srv})
+		guber, err := gubernator.New(gubernator.Config{
+			GRPCServer: srv,
+			Behaviors: gubernator.BehaviorConfig{
+				GlobalSyncWait: time.Millisecond * 50, // Suitable for testing but not production
+				GlobalTimeout:  time.Second,
+			},
+		})
 		if err != nil {
 			return errors.Wrap(err, "while creating new gubernator instance")
 		}
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`package cache`
`2`	`2`
`3`	`3`	`// Interface accepts any cache which returns cache stats`
`4`		`-type CacheStats interface {`
	`4`	`+type Stater interface {`
`5`	`5`	`Stats(bool) Stats`
`6`	`6`	`}`
`7`	`7`