Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/ONSdigital/dp-authorisation/auth"
"github.com/ONSdigital/dp-dataset-api/application"
"github.com/ONSdigital/dp-dataset-api/cloudflare"
"github.com/ONSdigital/dp-dataset-api/config"
"github.com/ONSdigital/dp-dataset-api/dimension"
"github.com/ONSdigital/dp-dataset-api/instance"
Expand Down Expand Up @@ -77,10 +78,11 @@ type DatasetAPI struct {
smDatasetAPI *application.StateMachineDatasetAPI
filesAPIClient filesAPISDK.Clienter
authToken string
cloudflareClient cloudflare.Clienter
}

// Setup creates a new Dataset API instance and register the API routes based on the application configuration.
func Setup(ctx context.Context, cfg *config.Configuration, router *mux.Router, dataStore store.DataStore, urlBuilder *url.Builder, downloadGenerators map[models.DatasetType]DownloadsGenerator, datasetPermissions, permissions AuthHandler, enableURLRewriting bool, smDatasetAPI *application.StateMachineDatasetAPI) *DatasetAPI {
func Setup(ctx context.Context, cfg *config.Configuration, router *mux.Router, dataStore store.DataStore, urlBuilder *url.Builder, downloadGenerators map[models.DatasetType]DownloadsGenerator, datasetPermissions, permissions AuthHandler, enableURLRewriting bool, smDatasetAPI *application.StateMachineDatasetAPI, cloudflareClient cloudflare.Clienter) *DatasetAPI {
api := &DatasetAPI{
dataStore: dataStore,
host: cfg.DatasetAPIURL,
Expand All @@ -100,6 +102,7 @@ func Setup(ctx context.Context, cfg *config.Configuration, router *mux.Router, d
MaxRequestOptions: cfg.MaxRequestOptions,
defaultLimit: cfg.DefaultLimit,
smDatasetAPI: smDatasetAPI,
cloudflareClient: cloudflareClient,
}

paginator := pagination.NewPaginator(cfg.DefaultLimit, cfg.DefaultOffset, cfg.DefaultMaxLimit)
Expand Down
4 changes: 2 additions & 2 deletions api/dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func GetAPIWithCMDMocks(mockedDataStore store.Storer, mockedGeneratedDownloads D
StateMachine: application.NewStateMachine(testContext, states, transitions, store.DataStore{Backend: mockedDataStore}),
}

return Setup(testContext, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapGeneratedDownloads, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI)
return Setup(testContext, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapGeneratedDownloads, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI, nil)
}

// GetAPIWithCMDMocks also used in other tests, so exported
Expand Down Expand Up @@ -217,7 +217,7 @@ func GetAPIWithCantabularMocks(mockedDataStore store.Storer, mockedGeneratedDown
StateMachine: application.NewStateMachine(testContext, states, transitions, store.DataStore{Backend: mockedDataStore}),
}

return Setup(testContext, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapGeneratedDownloads, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI)
return Setup(testContext, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapGeneratedDownloads, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI, nil)
}

func createRequestWithAuth(method, target string, body io.Reader) *http.Request {
Expand Down
14 changes: 11 additions & 3 deletions api/versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -830,19 +830,27 @@ func (api *DatasetAPI) putState(w http.ResponseWriter, r *http.Request) {
Type: models.Static.String(),
}

updatedVersion, err := api.smDatasetAPI.AmendVersion(r.Context(), vars, versionUpdate)
_, err = api.smDatasetAPI.AmendVersion(r.Context(), vars, versionUpdate)
if err != nil {
handleVersionAPIErr(ctx, err, w, logData)
return
}

if stateUpdate.State == models.PublishedState && updatedVersion.Distributions != nil && len(*updatedVersion.Distributions) > 0 {
err = api.publishDistributionFiles(ctx, updatedVersion, logData)
if stateUpdate.State == models.PublishedState && currentVersion.Distributions != nil && len(*currentVersion.Distributions) > 0 {
err = api.publishDistributionFiles(ctx, currentVersion, logData)
Comment on lines +833 to +840
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming the change to _ instead of updatedVersion is due to the linter which looks ok.
However currentVersion is being used on lines 839 and 840 where it was previously updatedVersion. I think it should remain as updatedVersion unless there's an issue I'm not seeing here?

if err != nil {
log.Error(ctx, "putState endpoint: failed to publish distribution files", err, logData)
handleVersionAPIErr(ctx, err, w, logData)
return
}

if api.cloudflareClient != nil {
if err := api.cloudflareClient.PurgeCacheByPrefix(ctx, datasetID, edition); err != nil {
log.Error(ctx, "failed to purge cloudflare cache", err, logData)
} else {
log.Info(ctx, "successfully triggered cloudflare cache purge", logData)
}
}
Comment on lines +847 to +853
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the client is nil here then there is no trace and cache will not be cleared which is a big problem for support. Ideally the client should be confirmed as not nil at setup so we don't have the nil check here.
If possible, it would be nice to have the list of URL's purged in the log.Info so we check them in environment testing.
I see that we only have the error log when purge fails as we don't want to block publishing. Might be worth asking Fran if this is ok since the purge can fail but the dataset will be published

}

setJSONContentType(w)
Expand Down
2 changes: 1 addition & 1 deletion api/webendpoints_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,5 +368,5 @@ func GetWebAPIWithMocks(ctx context.Context, mockedDataStore store.Storer, mocke
cfg.DatasetAPIURL = host
cfg.EnablePrivateEndpoints = false

return Setup(ctx, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapDownloadGenerators, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI)
return Setup(ctx, cfg, mux.NewRouter(), store.DataStore{Backend: mockedDataStore}, urlBuilder, mockedMapDownloadGenerators, datasetPermissions, permissions, enableURLRewriting, &mockStatemachineDatasetAPI, nil)
}
2 changes: 1 addition & 1 deletion ci/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ image_resource:
type: docker-image
source:
repository: golang
tag: 1.24.6-bullseye
tag: 1.24.11-bookworm

inputs:
- name: dp-dataset-api
Expand Down
4 changes: 2 additions & 2 deletions ci/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ image_resource:
type: docker-image
source:
repository: onsdigital/dp-concourse-tools-lint-go
tag: 1.24.6-bullseye-golangci-lint-2
tag: 1.24.11-bookworm-golangci-lint-2

inputs:
- name: dp-dataset-api
Expand All @@ -14,4 +14,4 @@ run:
path: dp-dataset-api/ci/scripts/lint.sh

caches:
- path: /go
- path: go/
13 changes: 9 additions & 4 deletions ci/unit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@ platform: linux
image_resource:
type: docker-image
source:
repository: golang
tag: 1.24.6-bullseye
repository: onsdigital/dp-concourse-tools-docker-go
tag: 1.24.11-dind-24

inputs:
- name: dp-dataset-api

run:
path: dp-dataset-api/ci/scripts/unit.sh
path: bash
args:
- -exc
- |
/start_docker.sh
dp-dataset-api/ci/scripts/unit.sh
caches:
- path: /go
- path: go/
140 changes: 140 additions & 0 deletions cloudflare/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package cloudflare

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"

"github.com/ONSdigital/log.go/v2/log"
"github.com/cloudflare/cloudflare-go"
)

// Clienter defines the interface for Cloudflare cache operations
type Clienter interface {
PurgeCacheByPrefix(ctx context.Context, datasetID, editionID string) error
}

// Client wraps the Cloudflare API client
type Client struct {
api *cloudflare.API
zoneID string
baseURL string
httpClient *http.Client
apiToken string
}

// New creates a new Cloudflare client
func New(apiToken, zoneID string, useSDK bool, baseURL ...string) (*Client, error) {
if apiToken == "" {
return nil, fmt.Errorf("cloudflare API token is required")
}
if zoneID == "" {
return nil, fmt.Errorf("cloudflare zone ID is required")
}

// if SDK disabled for local testing with the cloudflare stub, use the HTTP client
if !useSDK && len(baseURL) > 0 && baseURL[0] != "" {
return &Client{
api: nil,
zoneID: zoneID,
baseURL: baseURL[0],
apiToken: apiToken,
httpClient: &http.Client{Timeout: 10 * time.Second},
}, nil
}

// use real Cloudflare SDK
api, err := cloudflare.NewWithAPIToken(apiToken)
if err != nil {
return nil, fmt.Errorf("failed to create cloudflare client: %w", err)
}

return &Client{
api: api,
zoneID: zoneID,
}, nil
}

// PurgeCacheByPrefix purges the Cloudflare cache for dataset-related URLs
func (c *Client) PurgeCacheByPrefix(ctx context.Context, datasetID, editionID string) error {
Comment on lines +62 to +63
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this function shouldn't be in the SDK as it is specific to the dataset-api. The SDK should just accept a list of prefixes which it then purges (so any service can use the function).
The dataset-api should be responsible for building the prefixes and then this list will be passed to the SDK

prefixes := buildPrefixes(datasetID, editionID)

logData := log.Data{
"dataset_id": datasetID,
"edition": editionID,
"prefixes": prefixes,
}
log.Info(ctx, "purging cloudflare cache", logData)

// local mock
if c.baseURL != "" {
return c.purgeCacheViaHTTP(ctx, prefixes)
}

// for sdk
return c.purgeCacheViaSDK(ctx, prefixes)
}

// purgeCacheViaSDK uses the cloudflare sdk
func (c *Client) purgeCacheViaSDK(ctx context.Context, prefixes []string) error {
params := cloudflare.PurgeCacheRequest{
Prefixes: prefixes,
}

_, err := c.api.PurgeCache(ctx, c.zoneID, params)
if err != nil {
return fmt.Errorf("failed to purge cache: %w", err)
}

return nil
}

// purgeCacheViaHTTP makes direct HTTP call for local cloudflare stub
func (c *Client) purgeCacheViaHTTP(ctx context.Context, prefixes []string) error {
url := fmt.Sprintf("%s/zones/%s/purge_cache", c.baseURL, c.zoneID)

payload := map[string]interface{}{
"prefixes": prefixes,
}

body, err := json.Marshal(payload)
if err != nil {
return fmt.Errorf("failed to marshal request: %w", err)
}

req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(body))
if err != nil {
return fmt.Errorf("failed to create request: %w", err)
}

req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", c.apiToken))

resp, err := c.httpClient.Do(req)
if err != nil {
return fmt.Errorf("failed to make request: %w", err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return fmt.Errorf("purge failed with status %d: %s", resp.StatusCode, string(bodyBytes))
}

return nil
}

func buildPrefixes(datasetID, editionID string) []string {
return []string{
fmt.Sprintf("www.ons.gov.uk/datasets/%s", datasetID),
fmt.Sprintf("www.ons.gov.uk/datasets/%s/editions", datasetID),
fmt.Sprintf("www.ons.gov.uk/datasets/%s/editions/%s/versions", datasetID, editionID),
fmt.Sprintf("api.beta.ons.gov.uk/v1/datasets/%s", datasetID),
fmt.Sprintf("api.beta.ons.gov.uk/v1/datasets/%s/editions", datasetID),
fmt.Sprintf("api.beta.ons.gov.uk/v1/datasets/%s/editions/%s/versions", datasetID, editionID),
}
Comment on lines +131 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the previous comment, this function should be in the dataset-api and not the SDK package. I would also avoid hard-coded domain names and instead build off config values

}
84 changes: 84 additions & 0 deletions cloudflare/client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package cloudflare_test

import (
"testing"

"github.com/ONSdigital/dp-dataset-api/cloudflare"
. "github.com/smartystreets/goconvey/convey"
)

func TestNew(t *testing.T) {
Convey("Given valid API token and zone ID", t, func() {
apiToken := "test-token"
zoneID := "test-zone-id"
useSDK := false

Convey("When creating a new Cloudflare client", func() {
client, err := cloudflare.New(apiToken, zoneID, useSDK)

Convey("Then no error is returned", func() {
So(err, ShouldBeNil)
})

Convey("And the client is not nil", func() {
So(client, ShouldNotBeNil)
})
})
})

Convey("Given an empty API token", t, func() {
apiToken := ""
zoneID := "test-zone-id"
useSDK := false

Convey("When creating a new Cloudflare client", func() {
client, err := cloudflare.New(apiToken, zoneID, useSDK)

Convey("Then an error is returned", func() {
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "API token is required")
})

Convey("And the client is nil", func() {
So(client, ShouldBeNil)
})
})
})

Convey("Given an empty zone ID", t, func() {
apiToken := "test-token"
zoneID := ""
useSDK := false

Convey("When creating a new Cloudflare client", func() {
client, err := cloudflare.New(apiToken, zoneID, useSDK)

Convey("Then an error is returned", func() {
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "zone ID is required")
})

Convey("And the client is nil", func() {
So(client, ShouldBeNil)
})
})
})

Convey("Given both empty API token and zone ID", t, func() {
apiToken := ""
zoneID := ""
useSDK := false

Convey("When creating a new Cloudflare client", func() {
client, err := cloudflare.New(apiToken, zoneID, useSDK)

Convey("Then an error is returned", func() {
So(err, ShouldNotBeNil)
})

Convey("And the client is nil", func() {
So(client, ShouldBeNil)
})
})
})
}
11 changes: 10 additions & 1 deletion config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type Configuration struct {
EnablePermissionsAuth bool `envconfig:"ENABLE_PERMISSIONS_AUTH"`
EnableObservationEndpoint bool `envconfig:"ENABLE_OBSERVATION_ENDPOINT"`
EnableURLRewriting bool `envconfig:"ENABLE_URL_REWRITING"`
EnableCloudflareSDK bool `envconfig:"ENABLE_CLOUDFLARE_SDK"`
DisableGraphDBDependency bool `envconfig:"DISABLE_GRAPH_DB_DEPENDENCY"`
KafkaVersion string `envconfig:"KAFKA_VERSION"`
DefaultMaxLimit int `envconfig:"DEFAULT_MAXIMUM_LIMIT"`
Expand All @@ -59,6 +60,9 @@ type Configuration struct {
OTServiceName string `envconfig:"OTEL_SERVICE_NAME"`
OTBatchTimeout time.Duration `envconfig:"OTEL_BATCH_TIMEOUT"`
OtelEnabled bool `envconfig:"OTEL_ENABLED"`
CloudflareZoneID string `envconfig:"CLOUDFLARE_ZONE_ID"`
CloudflareAPIToken string `envconfig:"CLOUDFLARE_API_TOKEN" json:"-"`
CloudflareAPIURL string `envconfig:"CLOUDFLARE_API_URL"`
MongoConfig
}

Expand Down Expand Up @@ -109,12 +113,17 @@ func Get() (*Configuration, error) {
EnablePermissionsAuth: false,
EnableObservationEndpoint: true,
EnableURLRewriting: false,
EnableCloudflareSDK: false,
DisableGraphDBDependency: false,
KafkaVersion: "1.0.2",
DefaultMaxLimit: 1000,
DefaultLimit: 20,
DefaultOffset: 0,
MaxRequestOptions: 100, // Maximum number of options acceptable in an incoming Patch request. Compromise between one option per call (inefficient) and an order of 100k options per call, for census data (memory and computationally expensive)
MaxRequestOptions: 100, // Maximum number of options acceptable in an incoming Patch request. Compromise between one option per call (inefficient) and an order of 100k options per call, for census data (memory and computationally expensive)
CloudflareZoneID: "9f1eec58caedd8e902395a065c120073", // this is not a real zone id, purely for local testing purposes as any 32 character length alphanumeric string will work
CloudflareAPIToken: "test-token",
CloudflareAPIURL: "http://cloud-flare-stub:22500",

MongoConfig: MongoConfig{
MongoDriverConfig: mongodriver.MongoDriverConfig{
ClusterEndpoint: "localhost:27017",
Expand Down
Loading