@@ -24,44 +24,126 @@ def demonstrate_batch_processing():
24
24
logger .info ("Demonstrating batch processing..." )
25
25
26
26
# Initialize classifier
27
- classifier = AdaptiveClassifier ("bert -base-uncased " )
27
+ classifier = AdaptiveClassifier ("distilbert/distilbert -base-cased " )
28
28
29
29
# Create a larger dataset
30
30
texts = []
31
31
labels = []
32
32
33
33
# Simulate customer feedback dataset
34
34
feedback_data = [
35
+ # Positive feedback
35
36
("The product is amazing!" , "positive" ),
37
+ ("Exceeded all my expectations, truly worth every penny" , "positive" ),
38
+ ("Customer service was incredibly helpful and responsive" , "positive" ),
39
+ ("Best purchase I've made this year" , "positive" ),
40
+ ("The quality is outstanding" , "positive" ),
41
+ ("Shipping was super fast and packaging was perfect" , "positive" ),
42
+ ("Really impressed with the durability" , "positive" ),
43
+ ("Great value for money" , "positive" ),
44
+ ("The features are exactly what I needed" , "positive" ),
45
+ ("Easy to use and very intuitive" , "positive" ),
46
+ ("Fantastic product, will definitely buy again" , "positive" ),
47
+ ("Love how lightweight and portable it is" , "positive" ),
48
+ ("The installation process was seamless" , "positive" ),
49
+ ("Brilliant design and functionality" , "positive" ),
50
+ ("Top-notch quality and performance" , "positive" ),
51
+
52
+ # Negative feedback
36
53
("Worst experience ever" , "negative" ),
54
+ ("Product broke after just one week" , "negative" ),
55
+ ("Customer support never responded to my emails" , "negative" ),
56
+ ("Completely disappointed with the quality" , "negative" ),
57
+ ("Not worth the money at all" , "negative" ),
58
+ ("Arrived damaged and return process was horrible" , "negative" ),
59
+ ("The instructions were impossible to follow" , "negative" ),
60
+ ("Poor build quality, feels cheap" , "negative" ),
61
+ ("Missing essential features that were advertised" , "negative" ),
62
+ ("Terrible battery life" , "negative" ),
63
+ ("Keeps malfunctioning randomly" , "negative" ),
64
+ ("The worst customer service I've ever experienced" , "negative" ),
65
+ ("Save your money and avoid this product" , "negative" ),
66
+ ("Doesn't work as advertised" , "negative" ),
67
+ ("Had to return it immediately" , "negative" ),
68
+
69
+ # Neutral feedback
37
70
("It works as expected" , "neutral" ),
38
- # Add more examples...
71
+ ("Average product, nothing special" , "neutral" ),
72
+ ("Does the job, but could be better" , "neutral" ),
73
+ ("Reasonable price for what you get" , "neutral" ),
74
+ ("Some good features, some bad ones" , "neutral" ),
75
+ ("Pretty standard quality" , "neutral" ),
76
+ ("Not bad, not great" , "neutral" ),
77
+ ("Meets basic requirements" , "neutral" ),
78
+ ("Similar to other products in this category" , "neutral" ),
79
+ ("Acceptable performance for the price" , "neutral" ),
80
+ ("Middle-of-the-road quality" , "neutral" ),
81
+ ("Functions adequately" , "neutral" ),
82
+ ("Basic functionality works fine" , "neutral" ),
83
+ ("Got what I paid for" , "neutral" ),
84
+ ("Standard delivery time and service" , "neutral" ),
85
+
86
+ # Technical feedback
87
+ ("Getting error code 404 when trying to sync" , "technical" ),
88
+ ("App crashes after latest update" , "technical" ),
89
+ ("Can't connect to WiFi despite correct password" , "technical" ),
90
+ ("Battery drains even when device is off" , "technical" ),
91
+ ("Screen freezes during startup" , "technical" ),
92
+ ("Bluetooth pairing fails consistently" , "technical" ),
93
+ ("System shows unrecognized device error" , "technical" ),
94
+ ("Software keeps reverting to previous version" , "technical" ),
95
+ ("Memory full error after minimal usage" , "technical" ),
96
+ ("Device overheats during normal operation" , "technical" ),
97
+ ("USB port not recognizing connections" , "technical" ),
98
+ ("Network connectivity drops randomly" , "technical" ),
99
+ ("Authentication failed error on login" , "technical" ),
100
+ ("Sync process stuck at 99%" , "technical" ),
101
+ ("Database connection timeout error" , "technical" )
39
102
]
40
103
104
+ # Number of times to replicate each example
105
+ num_replications = 10 # This will create 10x more data
106
+
41
107
for text , label in feedback_data :
42
- texts .extend ([text ] * 10 ) # Replicate each example 10 times for demo
43
- labels .extend ([label ] * 10 )
108
+ # Add multiple copies of each example
109
+ texts .extend ([text ] * num_replications )
110
+ labels .extend ([label ] * num_replications )
111
+
112
+ logger .info (f"Total examples: { len (texts )} " )
113
+ logger .info (f"Examples per class: { sum (1 for l in labels if l == 'positive' )} /{ sum (1 for l in labels if l == 'negative' )} /"
114
+ f"{ sum (1 for l in labels if l == 'neutral' )} /{ sum (1 for l in labels if l == 'technical' )} " )
44
115
45
116
# Create dataset and dataloader
46
117
dataset = TextDataset (texts , labels )
47
- dataloader = DataLoader (dataset , batch_size = 32 , shuffle = True )
118
+ batch_size = 8
119
+ dataloader = DataLoader (dataset , batch_size = batch_size , shuffle = True )
120
+
121
+ # Calculate expected number of batches
122
+ expected_batches = len (dataset ) // batch_size + (1 if len (dataset ) % batch_size != 0 else 0 )
123
+ logger .info (f"Expected number of batches: { expected_batches } " )
48
124
49
125
# Process in batches
50
126
start_time = time .time ()
51
127
for batch_idx , (batch_texts , batch_labels ) in enumerate (dataloader ):
52
128
classifier .add_examples (batch_texts , batch_labels )
53
- if batch_idx % 10 == 0 :
54
- logger .info (f"Processed batch { batch_idx } " )
129
+ if batch_idx % 5 == 0 : # Log every 5 batches
130
+ logger .info (f"Processed batch { batch_idx + 1 } /{ expected_batches } " )
131
+
132
+ # Optional: print batch sizes to verify
133
+ if batch_idx in [0 , expected_batches // 2 , expected_batches - 1 ]: # Print first, middle, and last batch
134
+ logger .info (f"Batch { batch_idx + 1 } size: { len (batch_texts )} " )
55
135
56
- logger .info (f"Processing time: { time .time () - start_time :.2f} seconds" )
136
+ processing_time = time .time () - start_time
137
+ logger .info (f"Processing time: { processing_time :.2f} seconds" )
138
+ logger .info (f"Average time per batch: { processing_time / expected_batches :.2f} seconds" )
57
139
58
140
return classifier
59
141
60
142
def demonstrate_continuous_learning ():
61
143
"""Example of continuous learning with performance monitoring"""
62
144
logger .info ("Demonstrating continuous learning..." )
63
145
64
- classifier = AdaptiveClassifier ("bert -base-uncased " )
146
+ classifier = AdaptiveClassifier ("distilbert/distilbert -base-cased " )
65
147
66
148
# Initial training
67
149
initial_texts = [
@@ -118,7 +200,7 @@ def evaluate_performance(test_texts: List[str], test_labels: List[str]) -> float
118
200
def demonstrate_persistence ():
119
201
# 1. Create and train initial classifier
120
202
print ("Phase 1: Creating and training initial classifier" )
121
- classifier = AdaptiveClassifier ("bert -base-uncased " )
203
+ classifier = AdaptiveClassifier ("distilbert/distilbert -base-cased " )
122
204
123
205
# Add some initial examples
124
206
initial_texts = [
@@ -170,38 +252,74 @@ def demonstrate_multi_language():
170
252
logger .info ("Demonstrating multi-language support..." )
171
253
172
254
# Use a multilingual model
173
- classifier = AdaptiveClassifier ("bert -base-multilingual-uncased " )
255
+ classifier = AdaptiveClassifier ("distilbert/distilbert -base-multilingual-cased " )
174
256
175
- # Add examples in different languages
176
257
texts = [
177
- # English
258
+ # English - Positive
178
259
"This is great" ,
260
+ "I love this product" ,
261
+ "Amazing experience" ,
262
+ "Excellent service" ,
263
+ "Best purchase ever" ,
264
+ "Highly recommended" ,
265
+ "Really impressive quality" ,
266
+ "Fantastic results" ,
267
+
268
+ # English - Negative
179
269
"This is terrible" ,
180
- # Spanish
270
+ "Worst experience ever" ,
271
+ "Don't waste your money" ,
272
+ "Very disappointed" ,
273
+ "Poor quality product" ,
274
+ "Absolutely horrible" ,
275
+ "Complete waste of time" ,
276
+ "Not worth buying" ,
277
+
278
+ # Spanish - Positive
181
279
"Esto es excelente" ,
280
+ "Me encanta este producto" ,
281
+ "Una experiencia maravillosa" ,
282
+ "Servicio excepcional" ,
283
+ "La mejor compra" ,
284
+ "Muy recomendable" ,
285
+ "Calidad impresionante" ,
286
+ "Resultados fantásticos" ,
287
+
288
+ # Spanish - Negative
182
289
"Esto es terrible" ,
183
- # French
184
- "C'est excellent" ,
185
- "C'est terrible"
290
+ "La peor experiencia" ,
291
+ "No malgastes tu dinero" ,
292
+ "Muy decepcionado" ,
293
+ "Producto de mala calidad" ,
294
+ "Absolutamente horrible" ,
295
+ "Pérdida total de tiempo" ,
296
+ "No vale la pena comprarlo" ,
186
297
]
187
-
188
- labels = ["positive" , "negative" ] * 3
298
+
299
+ labels = ["positive" ] * 8 + ["negative" ] * 8 \
300
+ + ["positive" ] * 8 + ["negative" ] * 8
189
301
190
302
classifier .add_examples (texts , labels )
191
303
192
304
# Test in different languages
193
305
test_texts = [
194
- "This is wonderful" , # English
195
- "Esto es maravilloso" , # Spanish
196
- "C'est merveilleux" # French
306
+ # English
307
+ "This is wonderful" , # Positive
308
+ "This is terrible" , # Negative
309
+
310
+ # Spanish
311
+ "Esto es maravilloso" , # Positive
312
+ "Esto es terrible" , # Negative
197
313
]
198
-
314
+
315
+ # Print test results
316
+ print ("\n Testing predictions in multiple languages:" )
199
317
for text in test_texts :
200
318
predictions = classifier .predict (text )
201
- logger . info (f"\n Text: { text } " )
202
- logger . info ("Predictions:" )
319
+ print (f"\n Text: { text } " )
320
+ print ("Predictions:" )
203
321
for label , score in predictions :
204
- logger . info (f"{ label } : { score :.4f} " )
322
+ print (f"{ label } : { score :.4f} " )
205
323
206
324
return classifier
207
325
0 commit comments