Skip to content
Open
8 changes: 0 additions & 8 deletions .env.sample

This file was deleted.

220 changes: 150 additions & 70 deletions docetl/operations/reduce.py

Large diffs are not rendered by default.

286 changes: 217 additions & 69 deletions docetl/operations/utils/api.py

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions docetl/operations/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class LLMResult(BaseModel):
response: Any
total_cost: float
validated: bool
updated_state: Optional[dict[str, Any]]


class InvalidOutputError(Exception):
Expand Down Expand Up @@ -74,7 +75,8 @@ def truncate_messages(
model_input_context_length = model_cost.get(model.split("/")[-1], {}).get(
"max_input_tokens", 8192
)
total_tokens = sum(count_tokens(json.dumps(msg), model) for msg in messages)
total_tokens = sum(count_tokens(json.dumps(msg), model)
for msg in messages)

if total_tokens <= model_input_context_length - 100:
return messages
Expand All @@ -95,14 +97,15 @@ def truncate_messages(
truncated_encoded = (
encoded_content[: mid_point - tokens_to_remove // 2]
+ encoder.encode(f" ... [{tokens_to_remove} tokens truncated] ... ")
+ encoded_content[mid_point + tokens_to_remove // 2 :]
+ encoded_content[mid_point + tokens_to_remove // 2:]
)
truncated_content = encoder.decode(truncated_encoded)
total_tokens = len(encoded_content)

warning_type = "User" if not from_agent else "Agent"
rprint(
f"[yellow]{warning_type} Warning:[/yellow] Cutting {tokens_to_remove} tokens from a prompt with {total_tokens} tokens..."
f"[yellow]{warning_type} Warning:[/yellow] Cutting {
tokens_to_remove} tokens from a prompt with {total_tokens} tokens..."
)

longest_message["content"] = truncated_content
Expand Down
1 change: 1 addition & 0 deletions intermediate_results/data_processing/get_debates.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"presidential_candidates": ["Biden", "Trump", "Harris", "Clinton", "Perot", "Ford", "Carter", "Bush", "Dole", "Obama", "McCain", "Reagan", "Pence", "Ryan"], "key_points": [{"candidate_name": "Biden", "points": ["working on reducing inflation", "creating jobs", "strengthening healthcare", "addressing social issues such as abortion rights", "immigration", "climate change", "focusing on unity", "economic recovery", "comprehensive healthcare reform", "fighting for healthcare access and women's rights", "rebuilding the economy for the middle class"]}, {"candidate_name": "Trump", "points": ["stressing the need for a strong economy", "enforcing border security", "opposing regulations", "advocating for the Second Amendment", "expressing a nationalistic approach to foreign policy", "businessman focusing on immigration reform", "job creation through tax incentives", "strong stance on national security and trade policies", "stressing America-first policies", "tax cuts", "reduced immigration", "strong military", "advocating for deregulation", "job growth", "renegotiating trade deals to protect American jobs", "advocating for economic recovery", "tax cuts for the wealthy", "maintaining law and order across America"]}, {"candidate_name": "Harris", "points": ["focusing on social justice", "the need for healthcare reform", "promoting the rights of women", "tackling climate change", "representing an inclusive vision for the United States", "advocating for healthcare reform", "emphasizing inclusivity in governance", "supporting women's reproductive rights", "affordable healthcare", "investment in renewable energy and infrastructure"]}, {"candidate_name": "Clinton", "points": ["emphasizing the importance of women\u2019s rights", "healthcare access", "economic fairness", "diplomacy in foreign policy", "criticizing Trump's rhetoric", "advocating for a middle-class tax cut", "emphasizing healthcare reform", "welfare-to-work initiatives", "pushing for a balanced economy", "advocating for healthcare and education reforms", "emphasizing women's rights and social justice", "focusing on reduced taxes for the middle class", "economic growth through investment", "health care reform", "emphasizing character and trust as key leadership qualities", "using diplomacy in foreign policy while criticizing the opposition"]}, {"candidate_name": "Perot", "points": ["focusing on the importance of family values", "the American dream", "education reform", "national debt responsibility", "advocating for the people's voice in government policies"]}, {"candidate_name": "Ford", "points": ["appointed Secretary Kissinger", "negotiated significant treaties", "emphasized a strong defense budget", "faced criticism for his administration's handling of foreign policy issues"]}, {"candidate_name": "Carter", "points": ["emphasized the need for leadership change", "criticized Ford's foreign policy", "promoted social justice", "healthcare reform", "focusing on social justice", "economic recovery while addressing foreign policy issues", "especially regarding military action and peace negotiations"]}, {"candidate_name": "Bush", "points": ["President during the Iraq War", "emphasized national security", "faced criticism for economic policies", "focused on tax cuts to spur growth", "promoted free enterprise", "reducing taxes and increasing job growth", "advocating for military strength", "addressing international relations with an assertive foreign policy"]}, {"candidate_name": "Dole", "points": ["prioritized tax cuts for the middle class", "opposed big government approaches", "pushed for school choice", "healthcare reforms", "strong military presence"]}, {"candidate_name": "Obama", "points": ["emphasizing healthcare reforms", "job creation through middle-class tax cuts", "an ethical foreign policy approach", "contrasting with McCain's military strategies", "emphasizing healthcare reform", "job creation through middle-class tax cuts", "foreign policy based on ethical engagement and diplomacy"]}, {"candidate_name": "McCain", "points": ["highlighting military experience", "advocating for a strong military response to terrorism", "promoting economic reform with tax cuts", "criticizing Obama on foreign policy issues"]}, {"candidate_name": "Reagan", "points": ["emphasizes national strength", "a last resort approach to military conflict", "tax cuts", "deregulation", "focusing on economic growth and family values"]}, {"candidate_name": "Pence", "points": ["maintaining a pro-life stance", "emphasizing law and order", "promoting America's energy independence through fossil fuels"]}, {"candidate_name": "Ryan", "points": ["advocating for tax cuts", "entitlement reforms", "a focus on balancing the budget"]}], "_counts_prereduce_get_debates": 49}]
1 change: 1 addition & 0 deletions intermediate_results/data_processing/get_diseases.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"symptoms": [{"disease_name": "Influenza", "symptoms": ["fever", "chills", "sore throat", "muscle aches", "fatigue", "cough", "congestion"]}, {"disease_name": "Common Cold", "symptoms": ["runny nose", "sneezing", "sore throat", "mild symptoms"]}, {"disease_name": "COVID-19", "symptoms": ["fever", "cough", "fatigue", "difficulty breathing", "loss of taste or smell", "body aches"]}, {"disease_name": "Bronchitis", "symptoms": ["persistent cough", "mucus production", "fatigue"]}, {"disease_name": "Diabetes", "symptoms": ["extreme thirst", "frequent urination", "blurred vision", "weight loss for Type 1", "fatigue", "excessive hunger", "slow wound healing for Type 2"]}, {"disease_name": "Hyperthyroidism", "symptoms": ["excessive sweating", "weight loss", "increased appetite"]}, {"disease_name": "Malaria", "symptoms": ["cyclical fevers", "chills", "headaches", "nausea", "vomiting", "extreme fatigue"]}, {"disease_name": "Dengue Fever", "symptoms": ["high fever", "severe headaches", "joint and muscle pain", "rash", "bleeding"]}, {"disease_name": "Asthma", "symptoms": ["shortness of breath", "wheezing", "coughing", "chest tightness"]}, {"disease_name": "COPD", "symptoms": ["difficulty breathing", "wheezing", "chronic cough"]}, {"disease_name": "Cancer", "symptoms": ["unexplained weight loss", "fatigue", "persistent pain", "changes in skin appearance", "lumps or unusual growths"]}, {"disease_name": "Parkinson's disease", "symptoms": ["tremors", "muscle stiffness", "slow movement", "difficulty balancing", "cognitive changes", "depression", "difficulty with speech and swallowing"]}, {"disease_name": "Alzheimer's disease", "symptoms": ["memory loss", "confusion", "difficulties with thinking and problem-solving"]}, {"disease_name": "Epilepsy", "symptoms": ["recurrent seizures", "convulsions", "loss of consciousness", "unusual sensations"]}, {"disease_name": "Stroke", "symptoms": ["sudden numbness", "weakness", "confusion", "trouble speaking", "difficulty walking"]}, {"disease_name": "Multiple Sclerosis", "symptoms": ["numbness or weakness in limbs", "vision problems", "balance issues", "cognitive changes"]}, {"disease_name": "Guillain-Barr\u00e9 Syndrome", "symptoms": ["weakness", "tingling", "paralysis starting in feet and legs"]}, {"disease_name": "Tuberculosis", "symptoms": ["persistent cough", "chest pain", "coughing up blood", "fever", "night sweats", "weight loss"]}, {"disease_name": "Pneumonia", "symptoms": ["cough", "fever", "chills", "difficulty breathing"]}, {"disease_name": "Rheumatoid Arthritis", "symptoms": ["joint pain", "swelling", "morning stiffness", "fatigue"]}, {"disease_name": "Systemic Lupus Erythematosus", "symptoms": []}, {"disease_name": "Huntington's Disease", "symptoms": ["progressive motor dysfunction", "cognitive decline", "psychiatric symptoms"]}, {"disease_name": "Amyotrophic Lateral Sclerosis", "symptoms": ["muscle weakness", "respiratory failure"]}, {"disease_name": "Cystic Fibrosis", "symptoms": ["respiratory issues", "pancreatic insufficiency"]}], "top_disease_mentioned": "Diabetes", "most_harmful_disease": "Parkinson's disease", "explanation_of_most_harmful_disease": "A progressive neurodegenerative disorder characterized by tremors, bradykinesia, rigidity, and postural instability. Associated with cognitive decline, depression, and sleep disturbances.", "_counts_prereduce_get_diseases": 107}]
1 change: 1 addition & 0 deletions intermediate_results/data_processing/get_fruits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"fruits_or_veggies_list": [{"name": "persimmon", "count": 2}, {"name": "lychee", "count": 2}, {"name": "jackfruit", "count": 7}, {"name": "dragon fruit", "count": 4}, {"name": "loquat", "count": 2}, {"name": "mangosteen", "count": 4}, {"name": "blackcurrant", "count": 2}, {"name": "camu camu", "count": 4}, {"name": "acerola cherry", "count": 2}, {"name": "durian", "count": 2}, {"name": "jabuticaba", "count": 4}, {"name": "pomegranate", "count": 2}, {"name": "breadfruit", "count": 2}, {"name": "tamarillo", "count": 2}, {"name": "goji berry", "count": 2}, {"name": "baobab", "count": 2}, {"name": "golden berry", "count": 2}, {"name": "longan", "count": 2}, {"name": "rambutan", "count": 2}, {"name": "noni", "count": 2}, {"name": "starfruit", "count": 2}, {"name": "black sapote", "count": 2}, {"name": "chayote", "count": 2}, {"name": "miracle fruit", "count": 2}, {"name": "a\u00e7a\u00ed berry", "count": 4}, {"name": "soursop", "count": 4}, {"name": "African horned melon", "count": 4}, {"name": "tamarind", "count": 4}, {"name": "cherimoya", "count": 4}, {"name": "maracuja", "count": 4}, {"name": "pulasan", "count": 4}, {"name": "lucuma", "count": 4}, {"name": "maqui berry", "count": 4}, {"name": "cupuacu", "count": 4}, {"name": "gooseberries", "count": 4}, {"name": "bilberries", "count": 4}, {"name": "mangaba", "count": 4}, {"name": "ugli fruit", "count": 4}, {"name": "salak", "count": 4}, {"name": "elderberries", "count": 2}, {"name": "persimmons", "count": 2}, {"name": "langsat", "count": 2}, {"name": "medlar", "count": 2}, {"name": "nance", "count": 2}, {"name": "pawpaw", "count": 2}, {"name": "rose apple", "count": 2}, {"name": "sapodilla", "count": 2}, {"name": "surinam cherry", "count": 2}, {"name": "velvet apple", "count": 2}, {"name": "white mulberries", "count": 2}, {"name": "yellow mombin", "count": 2}, {"name": "ziziphus", "count": 2}, {"name": "apple", "count": 2}], "top_5_ranking": ["jackfruit", "a\u00e7a\u00ed berry", "soursop", "tamarind", "jabuticaba"], "_counts_prereduce_get_fruits": 8}]
Loading