Skip to content

Commit

Permalink
Merge pull request #51 from htrc/merge-develop
Browse files Browse the repository at this point in the history
Added new authentication methods
  • Loading branch information
samithaliyanage authored Jan 19, 2022
2 parents 5225dc7 + c3d311d commit a5ac3ad
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 71 deletions.
39 changes: 25 additions & 14 deletions htrc/auth.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,43 @@
import time
#from base64 import b64encode
from getpass import getpass
#import http.client
#import ssl
#import time
import subprocess

import requests
import requests.auth
#import configparser

import htrc.config


def get_jwt_token():
# Currently we just store one common jwt token locally at .htrc file for simplicity
# Expect to add POST method to query unique jwt token with the combo of username and password
username, password = credential_prompt()
#username, password = credential_prompt()

client_id, client_secret = htrc.config.get_credentials()
#client_id, client_secret = htrc.config.get_credentials()

auth = requests.auth.HTTPBasicAuth(client_id, client_secret)
data = {"grant_type": "password",
"username": username,
"password": password,
"scope": "openid"}
#auth = requests.auth.HTTPBasicAuth(client_id, client_secret)
#data = { "grant_type": "password",
#"username": username,
#"password": password,
#"scope" : "openid"}

url = htrc.config.get_idp_url()
r = requests.post(url, data=data, auth=auth)
url1 = htrc.config.get_idp_url()
capsule_id = htrc.config._get_value("jwt", "capsule_id")
result = subprocess.check_output("hostname -s -I | awk '{print $1}'", shell=True)
result = result.decode('utf-8')
result = result[:-1]
capsule_ip = result.strip()
url = url1 + "/" + capsule_id + "/" + capsule_ip
r = requests.get(url)

data = r.json()
if 'error' not in data:
expiration = int(time.time()) + data['expires_in']
return data['id_token'], expiration
#expiration = int(time.time()) + data['expires_in']
return data['token']
elif data['error'] == 'invalid_grant':
print("Invalid username or password. Please try again.\n")
return get_jwt_token()
Expand All @@ -50,5 +61,5 @@ def credential_prompt():


if __name__ == '__main__':
token, expiration = get_jwt_token()
htrc.config.save_jwt_token(token, expiration)
token = get_jwt_token()
htrc.config.save_jwt_token(token)
32 changes: 10 additions & 22 deletions htrc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self,
key: Optional[str] = None) -> None:
super().__init__()

self.token = token or get_jwt_token(save_new_token=False)
self.token = token or get_jwt_token()
self.host = host or get_dataapi_host()
self.port = port or get_dataapi_port()
self.epr = epr or get_dataapi_epr()
Expand Down Expand Up @@ -103,27 +103,15 @@ def get_idp_url(path=None):


# Add jwt credential access methods
def get_jwt_token(path=None, save_new_token=True):
try:
token = _get_value('jwt', 'token', path)

# check expiration date
expiration = int(_get_value('jwt', 'expiration', path))
if time.time() > expiration:
import htrc
htrc.config.remove_jwt_token()
raise RuntimeError("JWT token expired.")
except:
# This should run on either a missing or expired token.
import htrc.auth
token, expiration = htrc.auth.get_jwt_token()
if save_new_token:
htrc.config.save_jwt_token(token, expiration, path)
def get_jwt_token(path=None):

import htrc.auth
token = htrc.auth.get_jwt_token()

return token

def save_jwt_token(token, path=None):

def save_jwt_token(token, expiration=None, path=None):
"""
Saves JWT token in the config file.
"""
Expand All @@ -132,8 +120,8 @@ def save_jwt_token(token, expiration=None, path=None):
path = DEFAULT_PATH

# Default to expiration of now - force a new token on next request
if expiration is None:
expiration = time.time()
#if expiration is None:
#expiration = time.time()

# Open and modify existing config file, if it exists.
config = ConfigParser(allow_no_value=True)
Expand All @@ -144,7 +132,7 @@ def save_jwt_token(token, expiration=None, path=None):

# set token and expiration
config.set('jwt', 'token', token)
config.set('jwt', 'expiration', expiration)
#config.set('jwt', 'expiration', expiration)

with open(path, 'w') as credential_file:
config.write(credential_file)
Expand All @@ -168,7 +156,7 @@ def remove_jwt_token(path=None):
config.add_section('jwt')
# set token and expiration
config.set('jwt', 'token', " ")
config.set('jwt', 'expiration', " ")
#config.set('jwt', 'expiration', " ")

with open(path, 'w') as credential_file:
config.write(credential_file)
Expand Down
88 changes: 54 additions & 34 deletions htrc/volumes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,32 @@

standard_library.install_aliases()


#from builtins import input
from htrc.models import HtrcPage

import http.client
from io import BytesIO, TextIOWrapper
import json
import os.path
import progressbar

#import re
import socket
import ssl
#import sys
#from time import sleep
#from urllib.request import urlopen
#from urllib.error import HTTPError
from urllib.parse import urlencode
#import xml.etree.ElementTree as ET
from urllib.parse import urlencode
from zipfile import ZipFile # used to decompress requested zip archives.
from tqdm import tqdm
from htrc.runningheaders import parse_page_structure
from functools import partial
import pandas as pd
#from htrc.lib.cli import bool_prompt
from htrc.util import split_items
import htrc.config
import multiprocessing
Expand Down Expand Up @@ -62,7 +73,8 @@ def get_volumes(data_api_config: htrc.config.HtrcDataApiConfig, volume_ids, conc

data = {'volumeIDs': '|'.join(
[id.replace('+', ':').replace('=', '/') for id in volume_ids])}



if concat:
data['concat'] = 'true'

Expand All @@ -77,7 +89,7 @@ def get_volumes(data_api_config: htrc.config.HtrcDataApiConfig, volume_ids, conc
# TODO: Fix SSL cert verification
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
#ctx.verify_mode = ssl.CERT_NONE

# Retrieve the volumes
httpsConnection = http.client.HTTPSConnection(
Expand Down Expand Up @@ -157,7 +169,7 @@ def get_pages(data_api_config: htrc.config.HtrcDataApiConfig, page_ids, concat=F
# TODO: Fix SSL cert verification
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
#ctx.verify_mode = ssl.CERT_NONE

# Retrieve the volumes
httpsConnection = http.client.HTTPSConnection(
Expand Down Expand Up @@ -200,49 +212,52 @@ def get_pages(data_api_config: htrc.config.HtrcDataApiConfig, page_ids, concat=F
return data


def get_oauth2_token(username, password):
#def get_oauth2_token(username, password):
# make sure to set the request content-type as application/x-www-form-urlencoded
headers = {"Content-type": "application/x-www-form-urlencoded"}
data = {"grant_type": "client_credentials",
"client_secret": password,
"client_id": username}
data = urlencode(data)
#headers = {"Content-type": "application/x-www-form-urlencoded"}
#data = { "grant_type": "client_credentials",
#"client_secret": password,
#"client_id": username }
#data = urlencode(data)

# create an SSL context
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
#ctx = ssl.create_default_context()
#ctx.check_hostname = False
#ctx.verify_mode = ssl.CERT_NONE

# make sure the request method is POST
host, port = htrc.config.get_oauth2_host_port()
oauth2port = htrc.config.get_oauth2_port()
oauth2EPRurl = htrc.config.get_oauth2_url()
httpsConnection = http.client.HTTPSConnection(host, oauth2port, context=ctx)
httpsConnection.request("POST", oauth2EPRurl + "?" + data, "", headers)
#host, port = htrc.config.get_oauth2_host_port()
#oauth2port = htrc.config.get_oauth2_port()
#oauth2EPRurl = htrc.config.get_oauth2_url()
#httpsConnection = http.client.HTTPSConnection(host, oauth2port, context=ctx)
#httpsConnection.request("POST", oauth2EPRurl + "?" + data, "", headers)

response = httpsConnection.getresponse()
#response = httpsConnection.getresponse()

# if response status is OK
if response.status == 200:
data = response.read().decode('utf8')
#if response.status == 200:
#data = response.read().decode('utf8')

jsonData = json.loads(data)
logging.info("*** JSON: {}".format(jsonData))
#jsonData = json.loads(data)
#logging.info("*** JSON: {}".format(jsonData))

token = jsonData["access_token"]
logging.info("*** parsed token: {}".format(token))
#token = jsonData["access_token"]
#logging.info("*** parsed token: {}".format(token))

else:
logging.debug("Unable to get token")
logging.debug("Response Code: {}".format(response.status))
logging.debug("Response: {}".format(response.reason))
logging.debug(response.read())
raise EnvironmentError("Unable to get the token.")

if httpsConnection is not None:
httpsConnection.close()
#else:
#logging.debug("Unable to get token")
#logging.debug("Response Code: {}".format(response.status))
#logging.debug("Response: {}".format(response.reason))
#logging.debug(response.read())
#raise EnvironmentError("Unable to get token.")

#if httpsConnection is not None:
#httpsConnection.close()


#return token

return token


def grep_error(file_name, output_dir, pattern, txt_index):
Expand All @@ -260,6 +275,9 @@ def grep_error(file_name, output_dir, pattern, txt_index):
return na_volume


return na_volume


def _to_htrc_page(page_file, zip):
with TextIOWrapper(BytesIO(zip.read(page_file)), encoding='utf-8') as page:
return HtrcPage([line.rstrip() for line in page.readlines()])
Expand Down Expand Up @@ -416,7 +434,9 @@ def _remove_headers_footers_and_save(vol_data, concat, hf_min_similarity, hf_win
for vol_page_path, page_body in zip(sorted_vol_zip_page_paths, pages_body):
with open(os.path.join(output_dir, vol_page_path), 'w', encoding='utf-8') as page_file:
page_file.write(page_body)




removed_hf = []
for vol_page_path, vol_page in zip(sorted_vol_zip_page_paths, vol_pages):
if not (vol_page.has_header or vol_page.has_footer):
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import atexit
import tarfile

__version__ = '0.1.57b0'

__version__ = '0.1.57'


install_requires = ['PyLD', 'future', 'prov', 'unicodecsv', 'progressbar2', 'pandas',
'requests', 'argparse==1.1', 'topicexplorer==1.0b226', 'numpy==1.16.2', 'tqdm==4.46.0']
Expand Down

0 comments on commit a5ac3ad

Please sign in to comment.