From e1e3a06dbcc8450a6da3b4bca1f4a0ae88845109 Mon Sep 17 00:00:00 2001 From: vishaanS <127869399+vishaanS@users.noreply.github.com> Date: Mon, 18 Dec 2023 13:49:21 -0500 Subject: [PATCH 1/4] Production changes --- logging/bin/getlogs.py | 371 +++++++++++++++++++++-------------------- 1 file changed, 186 insertions(+), 185 deletions(-) diff --git a/logging/bin/getlogs.py b/logging/bin/getlogs.py index e9a84d19..3c17cb2e 100644 --- a/logging/bin/getlogs.py +++ b/logging/bin/getlogs.py @@ -13,44 +13,44 @@ from subprocess import run ##v 0.2.0 -def validate_input(dict): +def validate_input(checkInput): """ Validate the arguments passed by the user to ensure script will function properly""" ##Set maximum log limit for output MAX_ROWS = 10000 - if dict['maxInt'] > MAX_ROWS: + if checkInput['maxInt'] > MAX_ROWS: print("Error: Maxrows limit of 10000 exceeded.") sys.exit() #Check whether user provided kubeconfig for port-forwarding - if dict['portforward']: + if checkInput['portforward']: if os.environ.get('KUBECONFIG') is None: print("Error: Port forwarding argument selected but no KUBECONFIG env variable set.") sys.exit() else: ##Set default values - dict['host'] = 'port forwarded' - dict['port'] = 'port forwarded' + checkInput['host'] = 'port forwarded' + checkInput['port'] = 'port forwarded' ##Check for existence of Connection Settings in input dictionary - if(not dict['userName'] or not dict['password'] or not dict['host'] or not dict['port']): + if(not checkInput['userName'] or not checkInput['password'] or not checkInput['host'] or not checkInput['port']): print('\nError: Missing required connection settings. Please specify username, password, host, and port. \nDefault values can be manually exported as environment variables ESHOST, ESPORT, ESUSER, ESPASSWD \nTo port-forward and skip ESHOST and ESPORT, use -pf') - print("Username:", dict['userName'], " Password:", dict['password'], " Host:", dict['host'], " Port:", dict['port']) + print("Username:", checkInput['userName'], " Password:", checkInput['password'], " Host:", checkInput['host'], " Port:", checkInput['port']) sys.exit() - if dict['out-filename']: ##Check for supported file-types and existence of file + if checkInput['out-filename']: ##Check for supported file-types and existence of file - if(type(dict['out-filename']) == list): - dict['out-filename']= " ".join(dict['out-filename']) + if(type(checkInput['out-filename']) == list): + checkInput['out-filename']= " ".join(checkInput['out-filename']) - dict['out-filename'] = dict['out-filename'] + "." + dict['format'] ## Add format to file + checkInput['out-filename'] = checkInput['out-filename'] + "." + checkInput['format'] ## Add format to file - if os.path.isfile(dict['out-filename']): ##Check if file already exists - if (dict['force'] == False): + if os.path.isfile(checkInput['out-filename']): ##Check if file already exists + if (checkInput['force'] == False): print("\nUser specified output file already exists. Use -f to overwrite the file.\n") sys.exit() safe_dir = os.getcwd() ## Check for path traversal attack - if os.path.commonprefix((os.path.realpath(dict['out-filename']),safe_dir)) != safe_dir: + if os.path.commonprefix((os.path.realpath(checkInput['out-filename']),safe_dir)) != safe_dir: print("Error: Path traversal in out-filename not allowed.") sys.exit() @@ -61,52 +61,49 @@ def validate_input(dict): print("Error: Output file path not found. Please verify output file path. ") sys.exit() - if dict['savequery']: ##Find saved query path location - if(type(dict['savequery']) == list): - dict['savequery']= " ".join(dict['savequery']) - if (dict['savequery'].find('.') == -1): - dict['savequery'] = dict['savequery'] + ".json" - if ((not ".json" in dict['savequery'])): + if checkInput['savequery']: ##Find saved query path location + if(type(checkInput['savequery']) == list): + checkInput['savequery']= " ".join(checkInput['savequery']) + if (checkInput['savequery'].find('.') == -1): + checkInput['savequery'] = checkInput['savequery'] + ".json" + if ((not ".json" in checkInput['savequery'])): print('Error: Not a supported filetype for the saved query file. Supported type is .json') sys.exit() - if dict['query-filename']: ##Use for plugging in queries + if checkInput['query-filename']: ##Use for plugging in queries safe_dir = os.getcwd() ## Check for path traversal attack - if os.path.commonprefix((os.path.realpath(dict['query-filename']),safe_dir)) != safe_dir: + if os.path.commonprefix((os.path.realpath(checkInput['query-filename']),safe_dir)) != safe_dir: print("Error: Path traversal in query-filename not allowed.") sys.exit() - if (not os.path.isfile(dict['query-filename'])): + if (not os.path.isfile(checkInput['query-filename'])): print("Error: Invalid query file path.") sys.exit() ##Time Validator - Verifies input, and converts it to UTC - if (type(dict['dateTimeStart']) ==list): - dict['dateTimeStart'] = " ".join(dict['dateTimeStart']) - if (type(dict['dateTimeEnd']) == list): - dict['dateTimeEnd'] = " ".join(dict['dateTimeEnd']) + if (type(checkInput['dateTimeStart']) ==list): + checkInput['dateTimeStart'] = " ".join(checkInput['dateTimeStart']) + if (type(checkInput['dateTimeEnd']) == list): + checkInput['dateTimeEnd'] = " ".join(checkInput['dateTimeEnd']) try: - dict['dateTimeStart'] = (time.strptime(dict.get('dateTimeStart'), "%Y-%m-%d %H:%M:%S")) - dict['dateTimeEnd'] = (time.strptime(dict.get('dateTimeEnd'), "%Y-%m-%d %H:%M:%S")) - if (calendar.timegm(dict['dateTimeStart']) > calendar.timegm(dict['dateTimeEnd'])): + checkInput['dateTimeStart'] = (time.strptime(checkInput.get('dateTimeStart'), "%Y-%m-%d %H:%M:%S")) + checkInput['dateTimeEnd'] = (time.strptime(checkInput.get('dateTimeEnd'), "%Y-%m-%d %H:%M:%S")) + if (calendar.timegm(checkInput['dateTimeStart']) > calendar.timegm(checkInput['dateTimeEnd'])): print("Given start date is after the end date.") sys.exit() else: - dict['dateTimeStart'] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.mktime(dict['dateTimeStart']))) - dict['dateTimeEnd'] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.mktime(dict['dateTimeEnd']))) + checkInput['dateTimeStart'] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.mktime(checkInput['dateTimeStart']))) + checkInput['dateTimeEnd'] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.mktime(checkInput['dateTimeEnd']))) except ValueError: print("One or more date(s) have been formatted incorrectly. Correct format is Y-M-D H:M:S. Ex: 1999-02-16 10:00:00") sys.exit() - if (dict['message']): ## Argument formatting for query builder - if(type(dict['message']) == list): - dict['message']= " ".join(dict['message']) - if (dict['message'].find("'") > -1): ##Check for invalid single quotes - print("Please remove single quotes ('') from search argument.") - sys.exit() + if (checkInput['message']): ## Argument formatting for query builder + checkInput['message'] = checkInput['message'].replace('"',"'") -def open_port(dict): - """Binds the v4m-search service on port 9200 to a locally available port by accessing the namespace of running Opensearch instance""" +def open_port(): + """Binds the v4m-search service on port 9200 to a locally available port by accessing the namespace of running OpenSearch instance""" + """Modifies host and port args""" #Get open port s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind(("",0)) @@ -119,69 +116,67 @@ def open_port(dict): port_namespace = result.stdout.replace("'", "") if (not port_namespace): - print("Error: The V4M opensearch service is not currently running on this cluster. Port forwarding failed.") + print("Error: The V4M OpenSearch service is not currently running on this cluster. Port forwarding failed.") sys.exit() cmd = (["kubectl", "-n", port_namespace, "port-forward", "svc/v4m-search", str(port) + ':9200', '&']) full_command = " ".join(cmd) proc = subprocess.Popen(full_command, shell=True, stdout = subprocess.DEVNULL, stderr = subprocess.DEVNULL) - dict['host'] = 'localhost' - dict['port'] = port time.sleep(5) - return dict + return port -def build_query(dict): +def build_query(args): """Generates Query using Opensearch DSL""" - """Takes arguments from user and builds a query to pass to opensearch API""" + """Takes arguments from user and returns a JSON-format query to pass to OpenSearch API""" first = True argcounter=0 ##Counts unique options entered by user, sets min_match to this number - if (not dict['query-filename']): + if (not args['query-filename']): tfile = tempfile.NamedTemporaryFile(delete = False) ##If User has not specified query file, create temp file for one. temp = open(tfile.name, 'w') - temp.write('{"size": ' + str(dict['maxInt']) + ',"sort": [{"@timestamp": {"order": "desc","unmapped_type": "boolean"} }]') ## Establish size of query, remove scoring - temp.write(', "query": {"bool": {"must":[ {"range": {"@timestamp": {"gte": "' + dict['dateTimeStart'] + '","lt": "' + dict['dateTimeEnd'] + '"} } }], ') ##Establish Query with Time Range Requirements + temp.write('{"size": ' + str(args['maxInt']) + ',"sort": [{"@timestamp": {"order": "desc","unmapped_type": "boolean"} }]') ## Establish size of query, remove scoring + temp.write(', "query": {"bool": {"must":[ {"range": {"@timestamp": {"gte": "' + args['dateTimeStart'] + '","lt": "' + args['dateTimeEnd'] + '"} } }], ') ##Establish Query with Time Range Requirements temp.write('"should": [ ') ## Should Clause, search results must inlcude at least one of each specified option - for arg in dict.keys(): - if (("kube" in arg or "level" in arg or "mes" in arg or "log" in arg) and (dict[arg]) and arg.find('-ex')==-1): ##Looking for non-exclusion options that are not NoneType in args dictionary + for argname in args.keys(): + if (("kube" in argname or "level" in argname or "mes" in argname or "log" in argname) and (args[argname]) and argname.find('-ex')==-1): ##Looking for non-exclusion options that are not NoneType in args argsionary argcounter+=1 if (first == True): first = False else: temp.write(',') - if arg!='message': - for i in range(len(dict[arg])): - temp.write('{"match_phrase": { "' + arg + '":"' + dict[arg][i] + '" } }') - if(i != len(dict[arg])-1): + if argname!='message': + for i in range(len(args[argname])): + temp.write('{"match_phrase": { "' + argname + '":"' + args[argname][i] + '" } }') + if(i != len(args[argname])-1): temp.write(',') else: - temp.write('{"match_phrase": { "message":"' + ("".join(dict[arg])) + '"} }') + temp.write('{"match_phrase": { "message":"' + args[argname] + '"} }') first = True temp.write('], "minimum_should_match": ' + str(argcounter)) - for arg in dict.keys(): ##Must Not Clause, only added if specified by user - if (("kube" in arg or "level" in arg or "log" in arg) and dict[arg] and arg.find('-ex')>-1): - name = arg[0:arg.find('-')] + for argname in args.keys(): ##Must Not Clause, only added if specified by user + if (("kube" in argname or "level" in argname or "log" in argname) and args[argname] and argname.find('-ex')>-1): + name = arg[0:argname.find('-')] if (first == True): temp.write(', "must_not": [') first = False else: temp.write(',') - for i in range(len(dict[arg])): - temp.write('{"match_phrase": {"' + name + '":"' + dict[arg][i] + '"} }') - if i != len(dict[arg])-1: + for i in range(len(args[argname])): + temp.write('{"match_phrase": {"' + name + '":"' + args[argname][i] + '"} }') + if i != len(args[argname])-1: temp.write(',') if (first==False): temp.write(']') temp.write('} },') temp.write(' "fields": [') ## Add fields param - for i in range(len(dict['fields'])): - if i < len(dict['fields']) - 1: - temp.write('"' + dict['fields'][i] + '",' ) + for i in range(len(args['fields'])): + if i < len(args['fields']) - 1: + temp.write('"' + args['fields'][i] + '",' ) else: - temp.write('"' + dict['fields'][i] + '"],' ) + temp.write('"' + args['fields'][i] + '"],' ) temp.write('"_source": {"excludes": [] } }') temp.close() @@ -191,15 +186,15 @@ def build_query(dict): tfile.close() else: ##Open existing query - x = open(dict['query-filename'], 'rt') + x = open(args['query-filename'], 'rt') query = " ".join([line.strip() for line in x]) ## Turn file into string, return. x.close() return query -"""List of valid arguments that are read from user as soon as program is run, nargs=+ indicates that argument takes multiple whitespace separated values. """ def get_arguments(): - ###Defines the arguments a user can pass and parses them, includes help msgs + """List of valid arguments that are read from user as soon as program is run, nargs=+ indicates that argument takes multiple whitespace separated values. """ + parser = argparse.ArgumentParser(prog='getLogs.py', usage='\n%(prog)s [options]', description="""This program generates OpenSearch DSL Queries from user specified parameters, and submits them to a database to retrieve logs. The flags below provide specifications for your Query, and can be placed in any order. \n \033[1m NOTES: *All default values for username, password, host, and port, are derived from the ENV variables ESUSER, USPASSWD, ESHOST, ESPORT in that respective order. '\033[0m' \n \033[1m If you have default connections set in your environment variables, you can call this program without arguments and get the latest 10 logs from the target API in the default CSV format. \033[0m \n @@ -219,7 +214,7 @@ def get_arguments(): parser.add_argument('-sx', '--logsource-exclude', required=False, dest="logsource-ex",nargs='*', metavar="LOGSOURCE", help = "\nOne or more logsource for which logs should be excluded from the output\n\n") parser.add_argument('-l', '--level', required=False, dest='level', nargs='*', metavar="LEVEL", help = "\nOne or more message levels for which logs are sought\n\n") parser.add_argument('-lx', '--level-exclude', required=False, dest = 'level-exclude', nargs='*', metavar="LEVEL", help = "\nOne or more message levels for which logs should be excluded from the output.\n\n") - parser.add_argument('-se', '--search', required=False, dest= "message", nargs='*', metavar="MESSAGE", help = "\nWord or phrase contained in log message. Do not include single quotes ('')\n\n\n \t\t\t QUERY OUTPUT SETTINGS: \n\n") + parser.add_argument('-se', '--search', required=False, dest= "message", metavar="MESSAGE", help = "\nWord or phrase contained in log message. Use quotes to surround your message ('' or "")\n\n\n \t\t\t QUERY OUTPUT SETTINGS: \n\n") ##Query and Output Params parser.add_argument('-m', '--maxrows', required=False, dest ="maxInt", type=int, metavar="INTEGER", default=10, help = "\nThe maximum number of log messsages to return. Max possible rows is 10000\n\n") @@ -235,7 +230,7 @@ def get_arguments(): parser.add_argument('-i', '--index', required=False, dest="index", metavar="INDEX", default="viya_logs-*") ## help = "\nDetermine which index to perform the search in. Default: viya-logs-*\n\n ##Connection settings - parser.add_argument('-pf','--port-forward', required=False, dest="portforward", action = 'store_true', help = "\n If this option is provided, getlogs will use the value in your KUBECONFIG (case-sensitive) environment variable to port-forward and connect to the open-search API in the specified NAMESPACE. This skips ESHOST and ESPORT, but ESUSER and ESPASSWD are stil required to authenticate and connect to the database. \n\n") + parser.add_argument('-pf','--port-forward', required=False, dest="portforward", action = 'store_true', help = "\n If this option is provided, getlogs will use the value in your KUBECONFIG (case-sensitive) environment variable to port-forward and connect to the OpenSearch API in the specified NAMESPACE. This skips ESHOST and ESPORT, but ESUSER and ESPASSWD are stil required to authenticate and connect to the database. \n\n") parser.add_argument('-us','--user', required=False, dest="userName", default=os.environ.get("ESUSER"), help = "\nUsername for connecting to OpenSearch/Kibana (default: $ESUSER)\n\n") parser.add_argument('-pw', '--password', required=False, dest="password", default=os.environ.get("ESPASSWD"), help = "\nPassword for connecting to OpenSearch/Kibana (default: $ESPASSWD)\n\n") parser.add_argument('-ho', '--host', required=False, dest="host", default=os.environ.get("ESHOST"), help = "\nHostname for connection to OpenSearch/Kibana. Please ensure that host does not contain 'https://' (default: $ESHOST)\n\n") @@ -243,120 +238,126 @@ def get_arguments(): parser.add_argument('-nossl', '--disable-ssl', required=False, dest = "ssl", action= "store_false", help = "\n If this option is provided, SSL will not be used to connect to the database.\n\n") return parser.parse_args().__dict__ -args = get_arguments() ##Creates "args" dictionary that contains all user submitted options. Print "args" to debug values. Note that the 'dest' value for each argument in argparser object is its key. -validate_input(args) -if args['portforward']: - args = open_port(args) - -# Establish Client Using User Authorization and Connection Settings -auth = (args['userName'], args['password']) -client = OpenSearch( - hosts = [{'host': args['host'], 'port': args['port']}], - http_compress = True, # enables gzip compression for request bodies - http_auth = auth, - # client_cert = client_cert_path, - # client_key = client_key_path, - timeout = 90, - use_ssl = args['ssl'], - verify_certs = False, - ssl_assert_hostname = False, - ssl_show_warn = False -) - -##Build Query Using Arguments -x = build_query(args) -index_name = args['index'] - -if (args['showquery'] == True): ##Print Query if user asks. - print("The following query will be submitted:\n\n", json.dumps(json.loads(x), indent=2)) - -if(args['savequery']): ##Save Query if user asks. - safe_dir = os.getcwd() - if os.path.commonprefix((os.path.realpath(args['savequery']),safe_dir)) == safe_dir: - squery = os.path.realpath(args['savequery']) - # deepcode ignore PT: - with open(squery, "w") as outfile: - outfile.write(x) - else: - print("Error: Path traversal in save-query not allowed.") - sys.exit() - print("\nQuery saved to " + args['savequery']) - -print('\nSearching index: ') -try: - response = client.search(body=x, index=index_name) -except Exception as e: - print(e) - if ("getaddrinfo" in str(e)): - print("Connection Failed. Please verify the host and port values. ") - print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) - elif("Unauthorized" in str(e)): - print("User Authentication failed. Please verify username and password values.") - print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) - else: - print("Connection error. Please verify connection values. ") - print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) - sys.exit() - -if response['hits']['total']['value'] == 0: - print("No results found for submitted query.") - sys.exit() - -stdout = False -if (not args['out-filename']): - stdout = True -else: - # deepcode ignore PT: - x = open(args['out-filename'], 'w') - -hitsList = [] ##Check to see if any fields matched user provided fields, collect matching fields -for hit in response['hits']['hits']: +def main(): + + args = get_arguments() ##Creates "args" dictionary that contains all user submitted options. Print "args" to debug values. Note that the 'dest' value for each argument in argparser object is its key. + validate_input(args) + if args['portforward']: + args['host'] = 'localhost' + args['port'] = open_port() + + # Establish Client Using User Authorization and Connection Settings + auth = (args['userName'], args['password']) + client = OpenSearch( + hosts = [{'host': args['host'], 'port': args['port']}], + http_compress = True, # enables gzip compression for request bodies + http_auth = auth, + # client_cert = client_cert_path, + # client_key = client_key_path, + timeout = 90, + use_ssl = args['ssl'], + verify_certs = False, + ssl_assert_hostname = False, + ssl_show_warn = False + ) + + ##Build Query Using Arguments + x = build_query(args) + index_name = args['index'] + + if (args['showquery'] == True): ##Print Query if user asks. + print("The following query will be submitted:\n\n", json.dumps(json.loads(x), indent=2)) + + if(args['savequery']): ##Save Query if user asks. + safe_dir = os.getcwd() + if os.path.commonprefix((os.path.realpath(args['savequery']),safe_dir)) == safe_dir: + squery = os.path.realpath(args['savequery']) + # deepcode ignore PT: + with open(squery, "w") as outfile: + outfile.write(x) + else: + print("Error: Path traversal in save-query not allowed.") + sys.exit() + print("\nQuery saved to " + args['savequery']) + + print('\nSearching index: ') try: - hit['fields']['id'] = hit['_id'] - hitsList.append(hit['fields']) - except KeyError as e: - next - -for fieldDict in hitsList: - for field in args['fields']: - if not field in fieldDict.keys(): ##replaces empty fields with NULL values - fieldDict[field] = "NULL" - elif type(fieldDict[field]) == list: ##Converts lists into strings for proper output - fieldDict[field] = ''.join(fieldDict[field]) - -if (len(hitsList) == 0): - print("Error: No fields matched provided fieldnames. Please verify the field on opensearch-dashboards.\n") - sys.exit() - -##Output as proper filetype -if("json" in args['format']): ##JSON formatter, uses json.dump to print to stdout or file - if (not stdout): - with x as outfile: - # deepcode ignore PT: - json.dump(json.loads(hitsList), outfile, sort_keys=True, indent=2) - print("Search complete. Results printed to " + args['out-filename']) - else: - print("Search complete.") - sys.stdout.write(json.dumps(hitsList, sort_keys=True, indent=2)) - -elif("csv" in args['format']): ##CSV writer implemented using dictwriter - args['fields'].append("id") - - if (not stdout): - with x as csvfile: - header = args['fields'] - writer = csv.DictWriter(csvfile, fieldnames = header) - writer.writeheader() - for fieldDict in hitsList: - writer.writerow(fieldDict) - print("Search complete. Results printed to " + args['out-filename']) + response = client.search(body=x, index=index_name) + except Exception as e: + print(e) + if ("getaddrinfo" in str(e)): + print("Connection Failed. Please verify the host and port values. ") + print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) + elif("Unauthorized" in str(e)): + print("User Authentication failed. Please verify username and password values.") + print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) + else: + print("Connection error. Please verify connection values. ") + print("Username:", args['userName'], " Password:", args['password'], " Host:", args['host'], " Port:", args['port']) + sys.exit() + + if response['hits']['total']['value'] == 0: + print("No results found for submitted query.") + sys.exit() + + stdout = False + if (not args['out-filename']): + stdout = True else: - print("Search complete") - with sys.stdout as csvfile: - header = args['fields'] - writer = csv.DictWriter(csvfile, fieldnames = header) - writer.writeheader() - for fieldDict in hitsList: - writer.writerow(fieldDict) - print("\n") + # deepcode ignore PT: + x = open(args['out-filename'], 'w') + + hitsList = [] ##Check to see if any fields matched user provided fields, collect matching fields + for hit in response['hits']['hits']: + try: + hit['fields']['id'] = hit['_id'] + hitsList.append(hit['fields']) + except KeyError as e: + next + + for fieldDict in hitsList: + for field in args['fields']: + if not field in fieldDict.keys(): ##replaces empty fields with NULL values + fieldDict[field] = "NULL" + elif type(fieldDict[field]) == list: ##Converts lists into strings for proper output + fieldDict[field] = ''.join(fieldDict[field]) + + if (len(hitsList) == 0): + print("Error: No fields matched provided fieldnames. Please verify the field on OpenSearch-dashboards.\n") + sys.exit() + + ##Output as proper filetype + if("json" in args['format']): ##JSON formatter, uses json.dump to print to stdout or file + if (not stdout): + with x as outfile: + # deepcode ignore PT: + json.dump(json.loads(hitsList), outfile, sort_keys=True, indent=2) + print("Search complete. Results printed to " + args['out-filename']) + else: + print("Search complete.") + sys.stdout.write(json.dumps(hitsList, sort_keys=True, indent=2)) + + elif("csv" in args['format']): ##CSV writer implemented using dictwriter + args['fields'].append("id") + + if (not stdout): + with x as csvfile: + header = args['fields'] + writer = csv.DictWriter(csvfile, fieldnames = header) + writer.writeheader() + for fieldDict in hitsList: + writer.writerow(fieldDict) + print("Search complete. Results printed to " + args['out-filename']) + else: + print("Search complete") + with sys.stdout as csvfile: + header = args['fields'] + writer = csv.DictWriter(csvfile, fieldnames = header) + writer.writeheader() + for fieldDict in hitsList: + writer.writerow(fieldDict) + print("\n") + +if __name__ == "__main__": + main() From 40f423522f2701135f4cfac4fade547ddade4019 Mon Sep 17 00:00:00 2001 From: vishaanS <127869399+vishaanS@users.noreply.github.com> Date: Wed, 3 Jan 2024 09:09:27 -0500 Subject: [PATCH 2/4] Getlogs Production Changes --- logging/bin/getlogs.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/logging/bin/getlogs.py b/logging/bin/getlogs.py index 3c17cb2e..97f97272 100644 --- a/logging/bin/getlogs.py +++ b/logging/bin/getlogs.py @@ -51,11 +51,11 @@ def validate_input(checkInput): safe_dir = os.getcwd() ## Check for path traversal attack if os.path.commonprefix((os.path.realpath(checkInput['out-filename']),safe_dir)) != safe_dir: - print("Error: Path traversal in out-filename not allowed.") + print("Error: Out-file path must be in same working directory as getlogs.") sys.exit() try: - x = open(args['out-filename'], 'w') + x = open(checkInput['out-filename'], 'w') x.close() except FileNotFoundError as e: print("Error: Output file path not found. Please verify output file path. ") @@ -73,7 +73,7 @@ def validate_input(checkInput): if checkInput['query-filename']: ##Use for plugging in queries safe_dir = os.getcwd() ## Check for path traversal attack if os.path.commonprefix((os.path.realpath(checkInput['query-filename']),safe_dir)) != safe_dir: - print("Error: Path traversal in query-filename not allowed.") + print("Error: Query file must be from the same working directory as getlogs.") sys.exit() if (not os.path.isfile(checkInput['query-filename'])): @@ -158,7 +158,7 @@ def build_query(args): temp.write('], "minimum_should_match": ' + str(argcounter)) for argname in args.keys(): ##Must Not Clause, only added if specified by user if (("kube" in argname or "level" in argname or "log" in argname) and args[argname] and argname.find('-ex')>-1): - name = arg[0:argname.find('-')] + name = argname[0:argname.find('-')] if (first == True): temp.write(', "must_not": [') first = False @@ -222,7 +222,7 @@ def get_arguments(): parser.add_argument('-sh', '--show-query', required=False, dest="showquery", action= "store_true", help = "\n Displays the actual query that will be submitted during execution.\n\n") parser.add_argument('-sq', '--save-query', required=False, dest="savequery", nargs='*', metavar="FILENAME", help = "\n Specify a file name (without filetype) in which to save the generated query. Query is saved as JSON file in current working directory.\n\n") parser.add_argument('-o', '--out-file', required=False, dest="out-filename", nargs='*', metavar="FILENAME", help = "\nName of file to write results to. Filetype is specified using -format. Supported filetypes: .csv, .json\n\n") - parser.add_argument('-fo','--format', required=False, dest="format", default = "csv", help = "\n Formats results into the specified file (from --out-file). If no output file is provided, results will be outputted to STDOUT. Supported formats for console output are json and csv. \n\n") + parser.add_argument('-fo','--format', required=False, dest="format", default = "csv", choices = ['json', 'csv'], help = "\n Formats results into the specified file (from --out-file). If no output file is provided, results will be outputted to STDOUT. Supported formats for console output are json and csv. \n\n") parser.add_argument('-f','--force', required=False, dest="force", action= "store_true", help = "\n If this option is provided, the output results file from --out-file will be overwritten if it already exists.\n\n") parser.add_argument('-fi','--fields', required=False, dest="fields", nargs="*", metavar= "FIELDS", default=['@timestamp', 'level', 'kube.pod', 'message'], help = "\n Specify desired output columns from query. If a matching log is returned that does not have the specified field, a NULL value will be used as a placeholder. \n Default fields: @timestamp level kube.pod message \n Additional arguments: kube.host, properties.appname \n\n") parser.add_argument('-st', '--start', required=False, dest="dateTimeStart", nargs='*', metavar="DATETIME", default = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(time.localtime()) - 3600)), help = "\nDatetime for start of period for which logs are sought (default: 1 hour ago). Correct format is Y-M-D H:M:S. Ex: 2023-02-16 10:00:00\n\n") @@ -241,8 +241,9 @@ def get_arguments(): def main(): args = get_arguments() ##Creates "args" dictionary that contains all user submitted options. Print "args" to debug values. Note that the 'dest' value for each argument in argparser object is its key. - validate_input(args) - if args['portforward']: + validate_input(args) ##Pass args dictionary for input validation + + if args['portforward']: ##Modify connection settings if port forward is selected args['host'] = 'localhost' args['port'] = open_port() @@ -276,7 +277,7 @@ def main(): with open(squery, "w") as outfile: outfile.write(x) else: - print("Error: Path traversal in save-query not allowed.") + print("Error: Saved query must be written to current working directory.") sys.exit() print("\nQuery saved to " + args['savequery']) @@ -307,7 +308,7 @@ def main(): # deepcode ignore PT: x = open(args['out-filename'], 'w') - hitsList = [] ##Check to see if any fields matched user provided fields, collect matching fields + hitsList = [] ##Check to see if any fields in response matched user provided fields, collect matching fields for hit in response['hits']['hits']: try: hit['fields']['id'] = hit['_id'] @@ -326,7 +327,7 @@ def main(): print("Error: No fields matched provided fieldnames. Please verify the field on OpenSearch-dashboards.\n") sys.exit() - ##Output as proper filetype + ##Output as proper filetype, JSON or CSV if("json" in args['format']): ##JSON formatter, uses json.dump to print to stdout or file if (not stdout): with x as outfile: From 7500f614b6feca32185bd0ec5835553cef71f91d Mon Sep 17 00:00:00 2001 From: vishaanS <127869399+vishaanS@users.noreply.github.com> Date: Wed, 3 Jan 2024 09:17:36 -0500 Subject: [PATCH 3/4] Final Production Changes --- logging/bin/getlogs.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/logging/bin/getlogs.py b/logging/bin/getlogs.py index 97f97272..5acf6ad7 100644 --- a/logging/bin/getlogs.py +++ b/logging/bin/getlogs.py @@ -196,7 +196,7 @@ def get_arguments(): """List of valid arguments that are read from user as soon as program is run, nargs=+ indicates that argument takes multiple whitespace separated values. """ parser = argparse.ArgumentParser(prog='getLogs.py', usage='\n%(prog)s [options]', description="""This program generates OpenSearch DSL Queries from user specified parameters, and submits them to a database to retrieve logs. The flags below provide specifications for your Query, and can be placed in any order. \n - \033[1m NOTES: *All default values for username, password, host, and port, are derived from the ENV variables ESUSER, USPASSWD, ESHOST, ESPORT in that respective order. '\033[0m' \n + \033[1m NOTES: *All default values for username, password, host, and port, are derived from the ENV variables ESUSER, ESPASSWD, ESHOST, ESPORT in that respective order. '\033[0m' \n \033[1m If you have default connections set in your environment variables, you can call this program without arguments and get the latest 10 logs from the target API in the default CSV format. \033[0m \n Getlogs has a default set of fields that runs with every query (seen below). You can replace the default fields with your own space-separated set of fields using --fields. Ex: --fields kube.labels.sas_com/deployment properties.appname \n *The NAMESPACE*, POD*, CONTAINER*, LOGSOURCE* and LEVEL* options accept multiple, space-separated, values (e.g. --level INFO NONE). Please refrain from passing single quotes ('') into arguments. \n @@ -224,17 +224,17 @@ def get_arguments(): parser.add_argument('-o', '--out-file', required=False, dest="out-filename", nargs='*', metavar="FILENAME", help = "\nName of file to write results to. Filetype is specified using -format. Supported filetypes: .csv, .json\n\n") parser.add_argument('-fo','--format', required=False, dest="format", default = "csv", choices = ['json', 'csv'], help = "\n Formats results into the specified file (from --out-file). If no output file is provided, results will be outputted to STDOUT. Supported formats for console output are json and csv. \n\n") parser.add_argument('-f','--force', required=False, dest="force", action= "store_true", help = "\n If this option is provided, the output results file from --out-file will be overwritten if it already exists.\n\n") - parser.add_argument('-fi','--fields', required=False, dest="fields", nargs="*", metavar= "FIELDS", default=['@timestamp', 'level', 'kube.pod', 'message'], help = "\n Specify desired output columns from query. If a matching log is returned that does not have the specified field, a NULL value will be used as a placeholder. \n Default fields: @timestamp level kube.pod message \n Additional arguments: kube.host, properties.appname \n\n") + parser.add_argument('-fi','--fields', required=False, dest="fields", nargs="*", metavar= "FIELDS", default=['@timestamp', 'level', 'kube.pod', 'message'], help = "\n Specify desired output columns from query. If a matching log is returned that does not have the specified field, a NULL value will be used as a placeholder. ID is a default field for every log, so it does not need to be specified as a field. \n Default fields: @timestamp level kube.pod message ID\n\n") parser.add_argument('-st', '--start', required=False, dest="dateTimeStart", nargs='*', metavar="DATETIME", default = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(time.localtime()) - 3600)), help = "\nDatetime for start of period for which logs are sought (default: 1 hour ago). Correct format is Y-M-D H:M:S. Ex: 2023-02-16 10:00:00\n\n") parser.add_argument('-en', '--end', required=False, dest="dateTimeEnd",nargs='*', metavar="DATETIME", default = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), help = "\nDatetime for end of period for which logs are sought (default: now). \n\n\n \t\t\t CONNECTION SETTINGS: \n\n") parser.add_argument('-i', '--index', required=False, dest="index", metavar="INDEX", default="viya_logs-*") ## help = "\nDetermine which index to perform the search in. Default: viya-logs-*\n\n ##Connection settings - parser.add_argument('-pf','--port-forward', required=False, dest="portforward", action = 'store_true', help = "\n If this option is provided, getlogs will use the value in your KUBECONFIG (case-sensitive) environment variable to port-forward and connect to the OpenSearch API in the specified NAMESPACE. This skips ESHOST and ESPORT, but ESUSER and ESPASSWD are stil required to authenticate and connect to the database. \n\n") - parser.add_argument('-us','--user', required=False, dest="userName", default=os.environ.get("ESUSER"), help = "\nUsername for connecting to OpenSearch/Kibana (default: $ESUSER)\n\n") - parser.add_argument('-pw', '--password', required=False, dest="password", default=os.environ.get("ESPASSWD"), help = "\nPassword for connecting to OpenSearch/Kibana (default: $ESPASSWD)\n\n") - parser.add_argument('-ho', '--host', required=False, dest="host", default=os.environ.get("ESHOST"), help = "\nHostname for connection to OpenSearch/Kibana. Please ensure that host does not contain 'https://' (default: $ESHOST)\n\n") - parser.add_argument('-po', '--port', required=False, dest="port", default=os.environ.get("ESPORT"), help = "\nPort number for connection to OpenSearch/Kibana (default: $ESPORT)\n\n") + parser.add_argument('-pf','--port-forward', required=False, dest="portforward", action = 'store_true', help = "\n If this option is provided, getlogs will use the value in your KUBECONFIG (case-sensitive) environment variable to port-forward and connect to the OpenSearch API. This skips ESHOST and ESPORT, but ESUSER and ESPASSWD are stil required to authenticate and connect to the database. \n\n") + parser.add_argument('-us','--user', required=False, dest="userName", default=os.environ.get("ESUSER"), help = "\nUsername for connecting to OpenSearch (default: $ESUSER)\n\n") + parser.add_argument('-pw', '--password', required=False, dest="password", default=os.environ.get("ESPASSWD"), help = "\nPassword for connecting to OpenSearch (default: $ESPASSWD)\n\n") + parser.add_argument('-ho', '--host', required=False, dest="host", default=os.environ.get("ESHOST"), help = "\nHostname for connection to OpenSearch Please ensure that host does not contain 'https://' (default: $ESHOST)\n\n") + parser.add_argument('-po', '--port', required=False, dest="port", default=os.environ.get("ESPORT"), help = "\nPort number for connection to OpenSearch (default: $ESPORT)\n\n") parser.add_argument('-nossl', '--disable-ssl', required=False, dest = "ssl", action= "store_false", help = "\n If this option is provided, SSL will not be used to connect to the database.\n\n") return parser.parse_args().__dict__ From d6c4ecee53ef43ade808bb20c5aab7d1dd25f707 Mon Sep 17 00:00:00 2001 From: vishaanS <127869399+vishaanS@users.noreply.github.com> Date: Thu, 4 Jan 2024 13:39:17 -0500 Subject: [PATCH 4/4] Getlogs final production update` --- logging/bin/getlogs.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/logging/bin/getlogs.py b/logging/bin/getlogs.py index 5acf6ad7..8ff205bb 100644 --- a/logging/bin/getlogs.py +++ b/logging/bin/getlogs.py @@ -42,8 +42,6 @@ def validate_input(checkInput): if(type(checkInput['out-filename']) == list): checkInput['out-filename']= " ".join(checkInput['out-filename']) - checkInput['out-filename'] = checkInput['out-filename'] + "." + checkInput['format'] ## Add format to file - if os.path.isfile(checkInput['out-filename']): ##Check if file already exists if (checkInput['force'] == False): print("\nUser specified output file already exists. Use -f to overwrite the file.\n") @@ -51,7 +49,7 @@ def validate_input(checkInput): safe_dir = os.getcwd() ## Check for path traversal attack if os.path.commonprefix((os.path.realpath(checkInput['out-filename']),safe_dir)) != safe_dir: - print("Error: Out-file path must be in same working directory as getlogs.") + print("Error: Out-file path must be in the current working directory.") sys.exit() try: @@ -73,7 +71,7 @@ def validate_input(checkInput): if checkInput['query-filename']: ##Use for plugging in queries safe_dir = os.getcwd() ## Check for path traversal attack if os.path.commonprefix((os.path.realpath(checkInput['query-filename']),safe_dir)) != safe_dir: - print("Error: Query file must be from the same working directory as getlogs.") + print("Error: Query file must be from the current working directory.") sys.exit() if (not os.path.isfile(checkInput['query-filename'])): @@ -221,8 +219,8 @@ def get_arguments(): parser.add_argument('-q', '--query-file ', required=False, dest="query-filename", metavar="FILENAME.*", help = "\n Filepath of existing saved query in current working directory. Program will submit query from file, ALL other query parmeters ignored. Supported filetypes: .txt, .json\n\n") parser.add_argument('-sh', '--show-query', required=False, dest="showquery", action= "store_true", help = "\n Displays the actual query that will be submitted during execution.\n\n") parser.add_argument('-sq', '--save-query', required=False, dest="savequery", nargs='*', metavar="FILENAME", help = "\n Specify a file name (without filetype) in which to save the generated query. Query is saved as JSON file in current working directory.\n\n") - parser.add_argument('-o', '--out-file', required=False, dest="out-filename", nargs='*', metavar="FILENAME", help = "\nName of file to write results to. Filetype is specified using -format. Supported filetypes: .csv, .json\n\n") - parser.add_argument('-fo','--format', required=False, dest="format", default = "csv", choices = ['json', 'csv'], help = "\n Formats results into the specified file (from --out-file). If no output file is provided, results will be outputted to STDOUT. Supported formats for console output are json and csv. \n\n") + parser.add_argument('-o', '--out-file', required=False, dest="out-filename", nargs='*', metavar="FILENAME", help = "\nName of file to write results to. If no output file is provided, results will be outputted to STDOUT. \n\n") + parser.add_argument('-fo','--format', required=False, dest="format", default = "csv", choices = ['json', 'csv'], help = "\n Determines the output format for the returned log messages. Supported formats for output are json and csv. \n\n") parser.add_argument('-f','--force', required=False, dest="force", action= "store_true", help = "\n If this option is provided, the output results file from --out-file will be overwritten if it already exists.\n\n") parser.add_argument('-fi','--fields', required=False, dest="fields", nargs="*", metavar= "FIELDS", default=['@timestamp', 'level', 'kube.pod', 'message'], help = "\n Specify desired output columns from query. If a matching log is returned that does not have the specified field, a NULL value will be used as a placeholder. ID is a default field for every log, so it does not need to be specified as a field. \n Default fields: @timestamp level kube.pod message ID\n\n") parser.add_argument('-st', '--start', required=False, dest="dateTimeStart", nargs='*', metavar="DATETIME", default = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.mktime(time.localtime()) - 3600)), help = "\nDatetime for start of period for which logs are sought (default: 1 hour ago). Correct format is Y-M-D H:M:S. Ex: 2023-02-16 10:00:00\n\n") @@ -311,7 +309,7 @@ def main(): hitsList = [] ##Check to see if any fields in response matched user provided fields, collect matching fields for hit in response['hits']['hits']: try: - hit['fields']['id'] = hit['_id'] + hit['fields']['ID'] = hit['_id'] hitsList.append(hit['fields']) except KeyError as e: next @@ -332,14 +330,14 @@ def main(): if (not stdout): with x as outfile: # deepcode ignore PT: - json.dump(json.loads(hitsList), outfile, sort_keys=True, indent=2) + json.dump(hitsList, outfile, sort_keys=True, indent=2) print("Search complete. Results printed to " + args['out-filename']) else: print("Search complete.") sys.stdout.write(json.dumps(hitsList, sort_keys=True, indent=2)) elif("csv" in args['format']): ##CSV writer implemented using dictwriter - args['fields'].append("id") + args['fields'].append("ID") if (not stdout): with x as csvfile: