Skip to content

Commit 1d5c9b9

Browse files
pangea-andrestkenany
authored andcommittedSep 25, 2024
Sanitize - Implementation and tests
1 parent 0322298 commit 1d5c9b9

File tree

11 files changed

+691
-3
lines changed

11 files changed

+691
-3
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ pangea_sdk_log.json
1919
download/
2020

2121
*.gz
22+
sanitized.*

‎CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- `attributes` field in `/list-resources` and `/list-subjects` endpoint
13+
- Sanitize service support
1314

1415
## [3.10.0] - 2024-07-19
1516

‎pangea-sdk/.sdk-ci.yml

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ sdk-test-it:
2020
SERVICE_FILE_INTEL_ENV: LVE
2121
SERVICE_USER_INTEL_ENV: LVE
2222
SERVICE_REDACT_ENV: LVE
23+
SERVICE_SANITIZE_ENV: LVE
2324
SERVICE_VAULT_ENV: LVE
2425
before_script:
2526
- echo $ENV
@@ -76,6 +77,9 @@ sdk-test-it:
7677
- CLOUD: [AWS, GCP]
7778
ENV: ${SERVICE_REDACT_ENV}
7879
TEST: redact
80+
- CLOUD: [AWS]
81+
ENV: ${SERVICE_SANITIZE_ENV}
82+
TEST: sanitize
7983
- CLOUD: [AWS, GCP]
8084
ENV: ${SERVICE_VAULT_ENV}
8185
TEST: vault

‎pangea-sdk/v3/pangea/base_service.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func NewBaseService(name string, baseCfg *Config) BaseService {
3030

3131
func (bs *BaseService) PollResultByError(ctx context.Context, e AcceptedError) (*PangeaResponse[any], error) {
3232
if e.RequestID == nil {
33-
return nil, errors.New("Request ID is empty")
33+
return nil, errors.New("request ID is empty")
3434
}
3535

3636
resp, err := bs.PollResultByID(ctx, *e.RequestID, e.ResultField)

‎pangea-sdk/v3/pangea/file_uploader.go

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package pangea
2+
3+
import (
4+
"context"
5+
"crypto/sha256"
6+
"encoding/hex"
7+
"fmt"
8+
"hash/crc32"
9+
"io"
10+
"os"
11+
"strconv"
12+
"strings"
13+
)
14+
15+
type FileUploader struct {
16+
client *Client
17+
}
18+
19+
func NewFileUploader() FileUploader {
20+
cfg := &Config{
21+
QueuedRetryEnabled: false,
22+
}
23+
24+
return FileUploader{
25+
client: NewClient("FileUploader", cfg),
26+
}
27+
}
28+
29+
func (fu *FileUploader) UploadFile(ctx context.Context, url string, tm TransferMethod, fd FileData) error {
30+
if tm == TMmultipart {
31+
return fmt.Errorf("%s is not supported in UploadFile. Use service client instead", tm)
32+
}
33+
34+
fds := FileData{
35+
File: fd.File,
36+
Name: "file",
37+
Details: fd.Details,
38+
}
39+
return fu.client.UploadFile(ctx, url, tm, fds)
40+
}
41+
42+
type UploadFileParams struct {
43+
Size int `json:"size,omitempty"`
44+
CRC32C string `json:"crc32c,omitempty"`
45+
SHA256 string `json:"sha256,omitempty"`
46+
}
47+
48+
func GetUploadFileParams(input io.ReadSeeker) (*UploadFileParams, error) {
49+
// Create a new CRC32C hash
50+
crcHash := crc32.New(crc32.MakeTable(crc32.Castagnoli))
51+
// Create a new SHA256 hash
52+
sha256Hash := sha256.New()
53+
54+
// Seek back to the beginning of the file
55+
_, err := file.Seek(0, 0)
56+
if err != nil {
57+
return nil, err
58+
}
59+
60+
// Copy the file content into the hash function
61+
if _, err := io.Copy(sha256Hash, file); err != nil {
62+
return nil, err
63+
}
64+
// Get the hash sum as a byte slice
65+
hashInBytes := sha256Hash.Sum(nil)
66+
67+
// Seek back to the beginning of the file
68+
_, err = file.Seek(0, 0)
69+
if err != nil {
70+
return nil, err
71+
}
72+
73+
// Copy the file content into the hash calculation
74+
size, err := io.Copy(crcHash, file)
75+
if err != nil {
76+
return nil, err
77+
}
78+
// Get the CRC32C checksum value
79+
crc32c := crcHash.Sum32()
80+
81+
// Reset to be sent
82+
file.Seek(0, 0)
83+
84+
// Convert the CRC32 value to hexadecimal
85+
crcStr := strconv.FormatUint(uint64(crc32c), 16)
86+
// Pad "0" on the left to make it 8 characters long. It's for the zero bytes file case
87+
paddedCRCStr := strings.Repeat("0", 8-len(crcStr)) + crcStr
88+
89+
return &UploadFileParams{
90+
CRC32C: paddedCRCStr,
91+
SHA256: hex.EncodeToString(hashInBytes),
92+
Size: int(size),
93+
}, nil
94+
}
95+
96+
func GetFileSize(file *os.File) (int64, error) {
97+
_, err := file.Seek(0, io.SeekStart)
98+
if err != nil {
99+
return 0, err
100+
}
101+
102+
// Seek to the end of the file
103+
size, err := file.Seek(0, io.SeekEnd)
104+
if err != nil {
105+
return 0, err
106+
}
107+
108+
// Reset to be sent
109+
file.Seek(0, io.SeekStart)
110+
if err != nil {
111+
return 0, err
112+
}
113+
return size, nil
114+
}

‎pangea-sdk/v3/pangea/pangea.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ func (c *Client) NewRequestMultipart(method, url string, body any, fd FileData)
527527
}
528528

529529
// Write file
530-
if fw, err = w.CreateFormFile(fd.Name, "filename.exe"); err != nil {
530+
if fw, err = w.CreateFormFile(fd.Name, fd.Name); err != nil {
531531
return nil, err
532532
}
533533
if _, err = io.Copy(fw, fd.File); err != nil {
@@ -571,7 +571,7 @@ func (c *Client) NewRequestForm(method, url string, fd FileData, setHeaders bool
571571

572572
// Write file
573573
var err error
574-
part, err := w.CreateFormFile(fd.Name, "filename.exe")
574+
part, err := w.CreateFormFile(fd.Name, fd.Name)
575575
if err != nil {
576576
return nil, err
577577
}

‎pangea-sdk/v3/service/sanitize/api.go

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
package sanitize
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io"
8+
9+
"github.com/pangeacyber/pangea-go/pangea-sdk/v3/internal/request"
10+
"github.com/pangeacyber/pangea-go/pangea-sdk/v3/pangea"
11+
)
12+
13+
func (e *sanitize) Sanitize(ctx context.Context, input *SanitizeRequest, file io.ReadSeeker) (*pangea.PangeaResponse[SanitizeResult], error) {
14+
if input == nil {
15+
return nil, errors.New("nil input")
16+
}
17+
18+
if input.TransferMethod == pangea.TMpostURL {
19+
params, err := pangea.GetUploadFileParams(file)
20+
if err != nil {
21+
return nil, err
22+
}
23+
input.SHA256 = params.SHA256
24+
input.CRC32C = params.CRC32C
25+
input.Size = pangea.Int(params.Size)
26+
}
27+
28+
name := "file"
29+
if input.TransferMethod == pangea.TMmultipart {
30+
name = "upload"
31+
}
32+
33+
fd := pangea.FileData{
34+
File: file,
35+
Name: name,
36+
}
37+
38+
return request.DoPostWithFile(ctx, e.Client, "v1beta/sanitize", input, &SanitizeResult{}, fd)
39+
}
40+
41+
func (e *sanitize) RequestUploadURL(ctx context.Context, input *SanitizeRequest) (*pangea.PangeaResponse[SanitizeResult], error) {
42+
if input.TransferMethod == pangea.TMmultipart || input.TransferMethod == pangea.TMdestURL || input.TransferMethod == pangea.TMsourceURL {
43+
return nil, fmt.Errorf("transfer method [%s] is not supported in RequestUploadURL. Use Sanitize() method instead.", input.TransferMethod)
44+
}
45+
46+
if input.TransferMethod == pangea.TMpostURL && (input.SHA256 == "" || input.CRC32C == "" || input.Size == nil) {
47+
return nil, errors.New("Need to set SHA256, CRC32C and Size in order to use TMpostURL")
48+
}
49+
50+
return request.GetUploadURL(ctx, e.Client, "v1beta/sanitize", input, &SanitizeResult{})
51+
}
52+
53+
// SanitizeFile represents the SanitizeFile API request model.
54+
type SanitizeFile struct {
55+
ScanProvider string `json:"scan_provider,omitempty"`
56+
}
57+
58+
// SanitizeContent represents the SanitizeContent API request model.
59+
type SanitizeContent struct {
60+
URLIntel *bool `json:"url_intel,omitempty"`
61+
URLIntelProvider string `json:"url_intel_provider,omitempty"`
62+
DomainIntel *bool `json:"domain_intel,omitempty"`
63+
DomainIntelProvider string `json:"domain_intel_provider,omitempty"`
64+
Defang *bool `json:"defang,omitempty"`
65+
DefangThreshold *int `json:"defang_threshold,omitempty"`
66+
Redact *bool `json:"redact,omitempty"`
67+
RemoveAttachments *bool `json:"remove_attachments,omitempty"`
68+
RemoveInteractive *bool `json:"remove_interactive,omitempty"`
69+
}
70+
71+
// SanitizeShareOutput represents the SanitizeShareOutput API request model.
72+
type SanitizeShareOutput struct {
73+
Enabled *bool `json:"enabled,omitempty"`
74+
OutputFolder string `json:"output_folder,omitempty"`
75+
}
76+
77+
// SanitizeRequest represents the SanitizeRequest API request model.
78+
type SanitizeRequest struct {
79+
pangea.BaseRequest
80+
pangea.TransferRequest
81+
82+
SourceURL string `json:"source_url,omitempty"`
83+
ShareID string `json:"share_id,omitempty"`
84+
File *SanitizeFile `json:"file,omitempty"`
85+
Content *SanitizeContent `json:"content,omitempty"`
86+
ShareOutput *SanitizeShareOutput `json:"share_output,omitempty"`
87+
Size *int `json:"size,omitempty"`
88+
CRC32C string `json:"crc32c,omitempty"`
89+
SHA256 string `json:"sha256,omitempty"`
90+
UploadedFileName string `json:"uploaded_file_name,omitempty"`
91+
}
92+
93+
// DefangData represents the DefangData PangeaResponseResult.
94+
type DefangData struct {
95+
ExternalURLsCount int `json:"external_urls_count"`
96+
ExternalDomainsCount int `json:"external_domains_count"`
97+
DefangedCount int `json:"defanged_count"`
98+
URLIntelSummary string `json:"url_intel_summary"`
99+
DomainIntelSummary string `json:"domain_intel_summary"`
100+
}
101+
102+
// RedactData represents the RedactData PangeaResponseResult.
103+
type RedactData struct {
104+
RedactionCount int `json:"redaction_count"`
105+
SummaryCounts map[string]int `json:"summary_counts"`
106+
}
107+
108+
// CDR represents the CDR PangeaResponseResult.
109+
type CDR struct {
110+
FileAttachmentsRemoved int `json:"file_attachments_removed"`
111+
InteractiveContentsRemoved int `json:"interactive_contents_removed"`
112+
}
113+
114+
// SanitizeData represents the SanitizeData PangeaResponseResult.
115+
type SanitizeData struct {
116+
Defang *DefangData `json:"defang,omitempty"`
117+
Redact *RedactData `json:"redact,omitempty"`
118+
MaliciousFile bool `json:"malicious_file"`
119+
CDR *CDR `json:"cdr,omitempty"`
120+
}
121+
122+
// SanitizeResult represents the SanitizeResult PangeaResponseResult.
123+
type SanitizeResult struct {
124+
DestURL *string `json:"dest_url,omitempty"`
125+
DestShareID *string `json:"dest_share_id,omitempty"`
126+
Data SanitizeData `json:"data"`
127+
Parameters map[string]interface{} `json:"parameters,omitempty"`
128+
}

‎pangea-sdk/v3/service/sanitize/integration_test.go

+413
Large diffs are not rendered by default.
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package sanitize
2+
3+
import (
4+
"context"
5+
"io"
6+
7+
"github.com/pangeacyber/pangea-go/pangea-sdk/v3/pangea"
8+
)
9+
10+
type Client interface {
11+
Sanitize(ctx context.Context, input *SanitizeRequest, file io.ReadSeeker) (*pangea.PangeaResponse[SanitizeResult], error)
12+
RequestUploadURL(ctx context.Context, input *SanitizeRequest) (*pangea.PangeaResponse[SanitizeResult], error)
13+
14+
// Base service methods
15+
pangea.BaseServicer
16+
}
17+
18+
type sanitize struct {
19+
pangea.BaseService
20+
}
21+
22+
func New(cfg *pangea.Config) Client {
23+
cli := &sanitize{
24+
BaseService: pangea.NewBaseService("sanitize", cfg),
25+
}
26+
return cli
27+
}
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)
Please sign in to comment.