|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Generate mock reactions data for channel_messages CSV files. |
| 4 | +
|
| 5 | +This script takes an input CSV containing channel_messages data (without reactions) |
| 6 | +and outputs a new CSV with a generated 'reactions' column containing realistic mock data. |
| 7 | +""" |
| 8 | + |
| 9 | +import argparse |
| 10 | +import random |
| 11 | +from pathlib import Path |
| 12 | + |
| 13 | +import pandas as pd |
| 14 | + |
| 15 | + |
| 16 | +REACTION_NAMES = [ |
| 17 | + 'approved', |
| 18 | + 'merged', |
| 19 | + 'raised_hands', |
| 20 | + 'pray', |
| 21 | + 'laugh', |
| 22 | + 'ty', |
| 23 | + 'this', |
| 24 | + 'point_up', |
| 25 | + 'white_check_mark' |
| 26 | +] |
| 27 | + |
| 28 | +MOCK_SLACK_USER_IDS = [ |
| 29 | + 'U1FM88J9J', |
| 30 | + 'UNC2V4ZD7', |
| 31 | + 'U7W96XW2A', |
| 32 | + 'US3QTX4Z1', |
| 33 | + 'UQY0NPYWN', |
| 34 | + 'UR257U86O', |
| 35 | + 'UG6UG9470', |
| 36 | + 'U0KG42Q0P', |
| 37 | + 'UEA8H90WN', |
| 38 | + 'UNNH8IW78', |
| 39 | + 'U3GY5K9EQ', |
| 40 | + 'UL9VWQ9S2', |
| 41 | + 'UORS33I12', |
| 42 | + 'U8S28EIM6', |
| 43 | + 'UWL5JX23M', |
| 44 | + 'UV77JEL7Q', |
| 45 | + 'U2M260029', |
| 46 | + 'UFF1HYHJ2', |
| 47 | + 'UDC0ZA535', |
| 48 | + 'UB81I8027', |
| 49 | + 'U3HY0BS3W', |
| 50 | + 'UL92954KA', |
| 51 | + 'UZ5125Y28', |
| 52 | + 'U168N7Y9G', |
| 53 | + 'UE83A3954', |
| 54 | + 'UP9T7VJBF', |
| 55 | + 'UK839UQ5R', |
| 56 | + 'UCL2RJ89X', |
| 57 | + 'UFUV7GPHM', |
| 58 | + 'UNVWFC5Q1', |
| 59 | + 'UY640917T', |
| 60 | + 'UQSF747QR', |
| 61 | + 'U34AX16M0', |
| 62 | + 'UWBXBIUB7', |
| 63 | + 'U2QXC4ZNG', |
| 64 | + 'UN979RN92', |
| 65 | + 'UH749DC04', |
| 66 | + 'UL2989703', |
| 67 | + 'U4095YWYD', |
| 68 | + 'U24SJD0U3', |
| 69 | + 'U7ZP08017', |
| 70 | + 'U0GBC0N90', |
| 71 | + 'UU6K92DUR', |
| 72 | + 'UNY6WDI60', |
| 73 | + 'UJ65P6F44', |
| 74 | + 'U3TE14ET2', |
| 75 | + 'U96N45CL2', |
| 76 | + 'UK4V3ON2O', |
| 77 | + 'UKA77QAYY', |
| 78 | + 'U81OT695K', |
| 79 | + 'UPT27H737', |
| 80 | + 'UOBK5OFZ0', |
| 81 | + 'U0EIW7DOX', |
| 82 | + 'UZ271WU40', |
| 83 | + 'UYVM94HE1', |
| 84 | + 'UEZMYV74R', |
| 85 | + 'UYEP6YY91', |
| 86 | + 'U9E255NN9', |
| 87 | + 'ULRS0HVJ3', |
| 88 | + 'U378U76S4', |
| 89 | + 'UK756E555', |
| 90 | + 'U6P582A60', |
| 91 | + 'UL96240J4', |
| 92 | + 'UU95TZPYP', |
| 93 | + 'U306GEBY8', |
| 94 | + 'U549234TU', |
| 95 | + 'UIX8TSI0Y', |
| 96 | + 'UE0WA7608', |
| 97 | + 'U9YFOYM78', |
| 98 | + 'UZ90K3JDW', |
| 99 | + 'U5V38TNV1', |
| 100 | + 'U0381889F', |
| 101 | + 'UF39WLXAT', |
| 102 | + 'UHGL04640', |
| 103 | + 'UA7M10C0O', |
| 104 | + 'UB51ZBPGJ', |
| 105 | + 'UNS22LGIY', |
| 106 | + 'UN0J0I7F0', |
| 107 | + 'UH1039P7I', |
| 108 | + 'UO513ARUP', |
| 109 | + 'U3O3NUM10', |
| 110 | + 'UKVN1ZI1Z', |
| 111 | + 'ULAN263E4', |
| 112 | + 'U7PX1XVTE', |
| 113 | + 'UIDQ8QXI5', |
| 114 | + 'UV4M04Y17', |
| 115 | + 'U0D6182AR', |
| 116 | + 'UC3Z15JC3', |
| 117 | + 'U84A3Q05B', |
| 118 | + 'UWW10VL06', |
| 119 | + 'USI1BH3K3', |
| 120 | + 'U2XQ3JJAN', |
| 121 | + 'UYJ494VWS', |
| 122 | + 'ULWL32B29', |
| 123 | + 'U91Q0A72O', |
| 124 | + 'U85PB3J63', |
| 125 | + 'UA3IV9419', |
| 126 | + 'UB9LLF16J', |
| 127 | + 'U3U77Y17K', |
| 128 | + 'U08ITMEA6', |
| 129 | +] |
| 130 | + |
| 131 | + |
| 132 | +def generate_reactions() -> list[dict]: |
| 133 | + """ |
| 134 | + Generate a random list of reactions for a message. |
| 135 | + |
| 136 | + Returns a list of reaction dicts, each containing: |
| 137 | + - name: reaction name from REACTION_NAMES |
| 138 | + - users: list of user IDs who reacted |
| 139 | + - count: number of users (equals len(users)) |
| 140 | + """ |
| 141 | + # Randomly decide how many reaction types (0 to all available) |
| 142 | + # Weight towards fewer reactions (more realistic) |
| 143 | + num_reaction_types = random.choices( |
| 144 | + range(len(REACTION_NAMES) + 1), |
| 145 | + weights=[40, 25, 15, 10, 5, 3, 1, 0.5, 0.3, 0.2], # Weighted towards 0-2 reactions |
| 146 | + k=1 |
| 147 | + )[0] |
| 148 | + |
| 149 | + if num_reaction_types == 0: |
| 150 | + return [] |
| 151 | + |
| 152 | + # Select which reaction types to include |
| 153 | + selected_reactions = random.sample(REACTION_NAMES, num_reaction_types) |
| 154 | + |
| 155 | + reactions = [] |
| 156 | + for reaction_name in selected_reactions: |
| 157 | + # Randomly decide how many users reacted (1 to ~10 typically) |
| 158 | + # Weight towards fewer users per reaction |
| 159 | + num_users = random.choices( |
| 160 | + range(1, 11), |
| 161 | + weights=[40, 25, 15, 10, 5, 3, 1, 0.5, 0.3, 0.2], |
| 162 | + k=1 |
| 163 | + )[0] |
| 164 | + |
| 165 | + # Select random users |
| 166 | + users = random.sample(MOCK_SLACK_USER_IDS, num_users) |
| 167 | + |
| 168 | + reactions.append({ |
| 169 | + 'name': reaction_name, |
| 170 | + 'users': users, |
| 171 | + 'count': len(users) |
| 172 | + }) |
| 173 | + |
| 174 | + return reactions |
| 175 | + |
| 176 | + |
| 177 | +def process_csv(input_path: Path, output_path: Path) -> None: |
| 178 | + """ |
| 179 | + Read input CSV, generate reactions for each row, and write output CSV. |
| 180 | + """ |
| 181 | + # Read input CSV |
| 182 | + df = pd.read_csv(input_path) |
| 183 | + |
| 184 | + # Generate reactions for each row |
| 185 | + df['reactions'] = [str(generate_reactions()) for _ in range(len(df))] |
| 186 | + |
| 187 | + # Write output CSV |
| 188 | + df.to_csv(output_path, index=False) |
| 189 | + |
| 190 | + print(f"Processed {len(df)} rows") |
| 191 | + print(f"Output written to: {output_path}") |
| 192 | + |
| 193 | + |
| 194 | +def main(): |
| 195 | + parser = argparse.ArgumentParser( |
| 196 | + description="Generate mock reactions data for channel_messages CSV files." |
| 197 | + ) |
| 198 | + parser.add_argument( |
| 199 | + "input_file", |
| 200 | + type=Path, |
| 201 | + help="Path to input CSV file containing channel_messages data" |
| 202 | + ) |
| 203 | + parser.add_argument( |
| 204 | + "-o", "--output", |
| 205 | + type=Path, |
| 206 | + default=None, |
| 207 | + help="Path to output CSV file (default: input_file with '_with_reactions' suffix)" |
| 208 | + ) |
| 209 | + parser.add_argument( |
| 210 | + "--seed", |
| 211 | + type=int, |
| 212 | + default=None, |
| 213 | + help="Random seed for reproducible results" |
| 214 | + ) |
| 215 | + |
| 216 | + args = parser.parse_args() |
| 217 | + |
| 218 | + # Validate input file exists |
| 219 | + if not args.input_file.exists(): |
| 220 | + print(f"Error: Input file not found: {args.input_file}") |
| 221 | + return 1 |
| 222 | + |
| 223 | + # Set output path |
| 224 | + if args.output is None: |
| 225 | + output_path = args.input_file.with_stem(f"{args.input_file.stem}_with_reactions") |
| 226 | + else: |
| 227 | + output_path = args.output |
| 228 | + |
| 229 | + # Set random seed if provided |
| 230 | + if args.seed is not None: |
| 231 | + random.seed(args.seed) |
| 232 | + |
| 233 | + # Process the CSV |
| 234 | + process_csv(args.input_file, output_path) |
| 235 | + |
| 236 | + return 0 |
| 237 | + |
| 238 | + |
| 239 | +if __name__ == "__main__": |
| 240 | + exit(main()) |
| 241 | + |
0 commit comments