This repository has been archived by the owner on Mar 1, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Get table completion percentage #46
Closed
Closed
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
205264c
set context"
elsiehoffet-94 7df64d1
add column completion to database explorer
elsiehoffet-94 2c7d12f
add user tests
elsiehoffet-94 b64fbee
move user file
elsiehoffet-94 49bdbd8
Merge branch 'master' into eh/compute_db_completion_percentage
elsiehoffet-94 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from os import getenv | ||
|
||
from pagai.services.database_explorer import POSTGRES, ORACLE, DatabaseExplorer | ||
from pagai.errors import OperationOutcome | ||
from sqlalchemy.exc import OperationalError | ||
import json | ||
|
||
def get_col_completion(owner, table_name, sorted, db_model): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When do you think |
||
""" | ||
Returns the percentage of completion for all columns in the given table | ||
""" | ||
|
||
# switch on the possible db models | ||
# if the db model is not supported, an error is raised. | ||
db_drivers = {"POSTGRES": POSTGRES, "ORACLE": ORACLE} | ||
|
||
credentials = { | ||
'model': 'public', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
'host': getenv('DB_HOST'), | ||
'port': int(getenv('DB_PORT', 1531)), | ||
'database': getenv('DB_NAME'), | ||
'login': getenv('DB_USER'), | ||
'password': getenv('DB_PASSWORD'), | ||
} | ||
|
||
# db_model = "public" | ||
if db_model not in db_drivers: | ||
raise OperationOutcome(f"Database type {credentials.get('model')} is unknown") | ||
|
||
result_display = "" | ||
try: | ||
explorer = DatabaseExplorer(db_drivers[db_model], credentials) | ||
schema = explorer.get_db_schema(owner="public", driver=db_config["model"]) | ||
col_completion = explorer.get_column_completion(db_schema=schema, table=table_name, sort=sorted) | ||
|
||
# Return CSV friendly formatting | ||
for item in col_completion: | ||
result_display += f"{item[0]}, {item[1]} \n" | ||
return result_display | ||
|
||
except OperationalError as e: | ||
if "could not connect to server" in str(e): | ||
raise OperationOutcome(f"Could not connect to the database: {e}") | ||
else: | ||
raise OperationOutcome(e) | ||
except Exception as e: | ||
raise OperationOutcome(e) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,3 +134,20 @@ def get_db_schema(self, owner: str, driver=POSTGRES): | |
for row in result: | ||
db_schema[row["table_name"].lower()].append(row["column_name"].lower()) | ||
return db_schema | ||
|
||
|
||
def get_column_completion(self, db_schema: defaultdict(list), table: str, sort: bool): | ||
""" | ||
Returns a list of (column_name, percentage of completion) for all columns in a given table | ||
""" | ||
query_tmp = "" | ||
column_list = sorted(db_schema[table]) if sort else db_schema[table] | ||
for column in column_list[:-1] : | ||
query_tmp += f"round(count({column}) / count(*) * 100, 0), " | ||
query_tmp += f"round(count({column_list[-1]}) / count(*) * 100, 0) " | ||
sql_query = f"select {query_tmp} from {table}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think we could do that with sqlAlchemy? |
||
|
||
with self._sql_engine.connect() as connection: | ||
query_result = connection.execute(sql_query).fetchall() | ||
result = list(zip(db_schema[table], query_result[0])) | ||
return result |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where will this be called? Do you think we should make it a route (as for get_db_schema)?