|
3 | 3 |
|
4 | 4 | The main functions and their objectives are:
|
5 | 5 | 1. get_summary_api_function: Function used to summarize a bill - It takes in bill id, bill title and bill text
|
6 |
| - and returns summary of the bill. |
| 6 | + and returns summary of the bill. |
| 7 | +
|
7 | 8 | 2. get_tags_api_function: Function used to tag a bill with pre specified tags - It takes in bill id, bill title
|
8 |
| - and bill text and returns the selected tags from specified tags. |
| 9 | + and bill text and returns the selected tags from specified tags. |
| 10 | + |
| 11 | +3. get_summaries_and_tags_api_function: Combined function that generates both summary and tags in a single call - |
| 12 | + It takes in bill id, bill title and bill text, first generates a summary, and then |
| 13 | + uses this summary to generate relevant tags. This approach ensures tags are based on |
| 14 | + the distilled information in the summary rather than the full bill text. |
| 15 | +
|
| 16 | +4. get_tags_api_function_v2: Optimized version of tag generation that works with bill summaries - It takes in |
| 17 | + bill id, bill title and bill summary (instead of full text) to generate tags. This |
| 18 | + version provides more focused tagging by working with already-distilled information. |
| 19 | +
|
| 20 | +Note: |
| 21 | + - All functions return standardized response objects with status codes indicating success or specific failure modes |
| 22 | + - The v2 functions represent an improved approach that uses bill summaries for more efficient and accurate tagging |
| 23 | + - Templates for prompts are maintained separately to ensure consistency across different parts of the application |
9 | 24 |
|
10 | 25 | """
|
11 | 26 | import json
|
@@ -278,6 +293,7 @@ class BillDetails():
|
278 | 293 | committee_info: str = ''
|
279 | 294 | mgl_names: str = ''
|
280 | 295 | invoke_dict: dict = field(default_factory=list)
|
| 296 | + summary: str = '' |
281 | 297 |
|
282 | 298 | @dataclass()
|
283 | 299 | class LLMResults:
|
@@ -309,6 +325,63 @@ def extract_bill_context(bill_text: str) -> tuple:
|
309 | 325 |
|
310 | 326 | return combined_mgl, mgl_names
|
311 | 327 |
|
| 328 | +def get_summaries_and_tags_api_function(bill_id: str, bill_title: str, bill_text: str) -> dict: |
| 329 | + |
| 330 | + """ |
| 331 | + Generates both a summary and relevant tags for a given legislative bill in a single API call. |
| 332 | +
|
| 333 | + This function processes the bill in two steps: |
| 334 | + 1. Generates a summary of the bill using get_summary_api_function |
| 335 | + 2. Uses this summary to generate relevant tags using get_tags_api_function_v2 |
| 336 | +
|
| 337 | + The sequential processing ensures that tags are generated based on the distilled |
| 338 | + information in the summary rather than the full bill text, potentially improving |
| 339 | + tagging accuracy and consistency. |
| 340 | +
|
| 341 | + Args: |
| 342 | + bill_id (str): The unique identifier of the bill. |
| 343 | + bill_title (str): The title of the bill. |
| 344 | + bill_text (str): The full text content of the bill. |
| 345 | +
|
| 346 | + Returns: |
| 347 | + dict: A dictionary containing: |
| 348 | + - 'status' (int): Indicates the processing status: |
| 349 | + * 1: Both summary and tags generated successfully |
| 350 | + * -1: Failed to generate summary |
| 351 | + * -2: Failed to generate tags |
| 352 | + - 'summary' (str): The generated summary if successful, empty string otherwise |
| 353 | + - 'tags' (list): List of generated tags if successful, empty list otherwise |
| 354 | +
|
| 355 | + Process: |
| 356 | + 1. Attempts to generate a summary using get_summary_api_function |
| 357 | + 2. If summary generation succeeds, proceeds to generate tags using get_tags_api_function_v2 |
| 358 | + 3. If either step fails, returns appropriate status code and partial results |
| 359 | +
|
| 360 | + Note: |
| 361 | + - The function uses get_tags_api_function_v2 which is optimized to work with |
| 362 | + bill summaries rather than full bill text |
| 363 | + - If summary generation fails, tag generation is not attempted |
| 364 | + """ |
| 365 | + |
| 366 | + response_obj = { |
| 367 | + 'status': -1, |
| 368 | + 'summary': '', |
| 369 | + 'tags': [] |
| 370 | + } |
| 371 | + |
| 372 | + # Get the summary |
| 373 | + summary_response = get_summary_api_function(bill_id, bill_title, bill_text) |
| 374 | + |
| 375 | + response_obj.update(summary_response) |
| 376 | + |
| 377 | + if response_obj['summary'] == '' or response_obj['status'] != 1: |
| 378 | + return response_obj |
| 379 | + |
| 380 | + # Get tags |
| 381 | + tags_response = get_tags_api_function_v2(bill_id, bill_title, response_obj['summary']) |
| 382 | + response_obj.update(tags_response) |
| 383 | + return response_obj |
| 384 | + |
312 | 385 | def get_summary_api_function(bill_id: str, bill_title: str, bill_text: str) -> dict:
|
313 | 386 |
|
314 | 387 | """
|
@@ -416,6 +489,54 @@ def get_tags_api_function(bill_id: str, bill_title: str, bill_text: str) -> dict
|
416 | 489 | else:
|
417 | 490 | return {'status': status_code, 'tags': results.response}
|
418 | 491 |
|
| 492 | +def get_tags_api_function_v2(bill_id: str, bill_title: str, bill_summary: str) -> dict: |
| 493 | + |
| 494 | + """ |
| 495 | + Generates tags for a legislative bill using its summary instead of full text. |
| 496 | +
|
| 497 | + This version (v2) of the tag generation API offers a more streamlined approach by working |
| 498 | + with bill summaries rather than full bill text. This approach potentially provides more |
| 499 | + focused and relevant tags as it works with already-distilled information. |
| 500 | +
|
| 501 | + Args: |
| 502 | + bill_id (str): The unique identifier of the bill. |
| 503 | + bill_title (str): The title of the bill. |
| 504 | + bill_summary (str): A summarized version of the bill's content, typically |
| 505 | + generated by get_summary_api_function. |
| 506 | +
|
| 507 | + Returns: |
| 508 | + dict: A dictionary containing: |
| 509 | + - 'status' (int): Indicates the processing status: |
| 510 | + * 1: Tags generated successfully |
| 511 | + * -2: Failed to generate tags or necessary details not found |
| 512 | + - 'tags' (list): List of generated tags if successful, empty list otherwise |
| 513 | +
|
| 514 | + Process: |
| 515 | + 1. Creates a BillDetails object with bill ID, title, and summary |
| 516 | + 2. Calls get_tags_v2 to generate tags based on the summary |
| 517 | + 3. Formats and returns the results |
| 518 | +
|
| 519 | + Note: |
| 520 | + - This function is optimized to work with bill summaries rather than full bill text, |
| 521 | + making it more efficient and potentially more accurate than the original version |
| 522 | + - It is commonly used in conjunction with get_summary_api_function as part of a |
| 523 | + combined summary and tagging pipeline |
| 524 | + - The function uses an alternative tagging method (get_tags_v2) specifically |
| 525 | + designed to work with summarized content |
| 526 | + """ |
| 527 | + |
| 528 | + bill_details = BillDetails( |
| 529 | + bill_id = bill_id, |
| 530 | + bill_title = bill_title, |
| 531 | + summary = bill_summary |
| 532 | + ) |
| 533 | + status_code, results = get_tags_v2(bill_details) |
| 534 | + |
| 535 | + if status_code != 1: |
| 536 | + return {'status': status_code, 'tags': []} |
| 537 | + else: |
| 538 | + return {'status': status_code, 'tags': results.response} |
| 539 | + |
419 | 540 | def get_llm_call_type(bill_details: BillDetails) -> str:
|
420 | 541 | """
|
421 | 542 | This function calculates number of tokens and decides on weather to use RAG or not. It reutrns a string output
|
@@ -532,6 +653,58 @@ def get_tags(bill_details: BillDetails) -> tuple[int, LLMResults]:
|
532 | 653 |
|
533 | 654 | return 1, tag_response
|
534 | 655 |
|
| 656 | +def get_tags_v2(bill_details: BillDetails) -> LLMResults: |
| 657 | + |
| 658 | + """ |
| 659 | + Helper function that generates tags for a bill using its summary. |
| 660 | +
|
| 661 | + This optimized version of the tagging function works directly with bill summaries |
| 662 | + instead of full bill text. It uses a predefined prompt template specifically designed |
| 663 | + for processing summarized content. |
| 664 | +
|
| 665 | + Args: |
| 666 | + bill_details (BillDetails): Object containing bill information, must include: |
| 667 | + - summary: Summarized content of the bill |
| 668 | + - bill_title: Title of the bill |
| 669 | +
|
| 670 | + Returns: |
| 671 | + tuple[int, LLMResults]: A tuple containing: |
| 672 | + - int: Status code indicating the operation result: |
| 673 | + * 1: Tags generated successfully |
| 674 | + * -2: Required bill details missing |
| 675 | + - LLMResults: Object containing the query and response from the LLM. |
| 676 | + Response contains a list of generated tags if successful. |
| 677 | +
|
| 678 | + Process: |
| 679 | + 1. Validates presence of required bill attributes |
| 680 | + 2. Sets up LLM cache for efficient processing |
| 681 | + 3. Prepares the input dictionary with bill summary and title |
| 682 | + 4. Calls the language model with a summary-specific prompt |
| 683 | + 5. Filters generated tags to ensure they exist in predefined tag set |
| 684 | +
|
| 685 | + Note: |
| 686 | + - Uses 'small' LLM call type as summaries are typically compact |
| 687 | + - Automatically deduplicates tags using set operations |
| 688 | + - Validates generated tags against a predefined set of allowed tags |
| 689 | + - Relies on TAGGING_PROMPT_USING_SUMMARIES template from prompts.py |
| 690 | + """ |
| 691 | + |
| 692 | + if not all(hasattr(bill_details, attr) for attr in ("summary", "bill_title")): |
| 693 | + return -2, LLMResults() |
| 694 | + |
| 695 | + set_my_llm_cache() |
| 696 | + llm_call_type = 'small' |
| 697 | + query = TAGGING_PROMPT_USING_SUMMARIES |
| 698 | + bill_details.invoke_dict = { |
| 699 | + 'bill_title': bill_details.bill_title, |
| 700 | + 'context': [Document(page_content = f"```{bill_details.summary}```")], |
| 701 | + 'tags': new_tags |
| 702 | + } |
| 703 | + |
| 704 | + tag_response = call_llm(bill_details, query, llm_call_type) |
| 705 | + tag_response.response = list(set(extract_categories_tags(tag_response.response)) & set(new_tags)) |
| 706 | + return 1, tag_response |
| 707 | + |
535 | 708 | def extract_categories_tags(response: str) -> list:
|
536 | 709 |
|
537 | 710 | """
|
|
0 commit comments