[SPARK-9490] [DOCS] [MLLIB] MLlib evaluation metrics guide example py…

…thon code uses deprecated print statement Use print(x) not print x for Python 3 in eval examples CC sethah mengxr -- just wanted to close this out before 1.5 Author: Sean Owen <[email protected]> Closes apache#7822 from srowen/SPARK-9490 and squashes the following commits: 01abeba [Sean Owen] Change "print x" to "print(x)" in the rest of the docs too bd7f7fb [Sean Owen] Use print(x) not print x for Python 3 in eval examples
EugenCepoi · Jul 31, 2015 · 873ab0f · 873ab0f
1 parent 815c824
commit 873ab0f
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 50 deletions.
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
@@ -561,7 +561,7 @@ test = sc.parallelize([(4L, "spark i j k"),
 prediction = model.transform(test)
 selected = prediction.select("id", "text", "prediction")
 for row in selected.collect():
-    print row
+    print(row)
 
 sc.stop()
 {% endhighlight %}

diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
@@ -302,10 +302,10 @@ predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp
 metrics = BinaryClassificationMetrics(predictionAndLabels)
 
 # Area under precision-recall curve
-print "Area under PR = %s" % metrics.areaUnderPR
+print("Area under PR = %s" % metrics.areaUnderPR)
 
 # Area under ROC curve
-print "Area under ROC = %s" % metrics.areaUnderROC
+print("Area under ROC = %s" % metrics.areaUnderROC)
 
 {% endhighlight %}
 
@@ -606,24 +606,24 @@ metrics = MulticlassMetrics(predictionAndLabels)
 precision = metrics.precision()
 recall = metrics.recall()
 f1Score = metrics.fMeasure()
-print "Summary Stats"
-print "Precision = %s" % precision
-print "Recall = %s" % recall
-print "F1 Score = %s" % f1Score
+print("Summary Stats")
+print("Precision = %s" % precision)
+print("Recall = %s" % recall)
+print("F1 Score = %s" % f1Score)
 
 # Statistics by class
 labels = data.map(lambda lp: lp.label).distinct().collect()
 for label in sorted(labels):
-    print "Class %s precision = %s" % (label, metrics.precision(label))
-    print "Class %s recall = %s" % (label, metrics.recall(label))
-    print "Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))
+    print("Class %s precision = %s" % (label, metrics.precision(label)))
+    print("Class %s recall = %s" % (label, metrics.recall(label)))
+    print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))
 
 # Weighted stats
-print "Weighted recall = %s" % metrics.weightedRecall
-print "Weighted precision = %s" % metrics.weightedPrecision
-print "Weighted F(1) Score = %s" % metrics.weightedFMeasure()
-print "Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)
-print "Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate
+print("Weighted recall = %s" % metrics.weightedRecall)
+print("Weighted precision = %s" % metrics.weightedPrecision)
+print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
+print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
+print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
 {% endhighlight %}
 
 </div>
@@ -881,28 +881,28 @@ scoreAndLabels = sc.parallelize([
 metrics = MultilabelMetrics(scoreAndLabels)
 
 # Summary stats
-print "Recall = %s" % metrics.recall()
-print "Precision = %s" % metrics.precision()
-print "F1 measure = %s" % metrics.f1Measure()
-print "Accuracy = %s" % metrics.accuracy
+print("Recall = %s" % metrics.recall())
+print("Precision = %s" % metrics.precision())
+print("F1 measure = %s" % metrics.f1Measure())
+print("Accuracy = %s" % metrics.accuracy)
 
 # Individual label stats
 labels = scoreAndLabels.flatMap(lambda x: x[1]).distinct().collect()
 for label in labels:
-    print "Class %s precision = %s" % (label, metrics.precision(label))
-    print "Class %s recall = %s" % (label, metrics.recall(label))
-    print "Class %s F1 Measure = %s" % (label, metrics.f1Measure(label))
+    print("Class %s precision = %s" % (label, metrics.precision(label)))
+    print("Class %s recall = %s" % (label, metrics.recall(label)))
+    print("Class %s F1 Measure = %s" % (label, metrics.f1Measure(label)))
 
 # Micro stats
-print "Micro precision = %s" % metrics.microPrecision
-print "Micro recall = %s" % metrics.microRecall
-print "Micro F1 measure = %s" % metrics.microF1Measure
+print("Micro precision = %s" % metrics.microPrecision)
+print("Micro recall = %s" % metrics.microRecall)
+print("Micro F1 measure = %s" % metrics.microF1Measure)
 
 # Hamming loss
-print "Hamming loss = %s" % metrics.hammingLoss
+print("Hamming loss = %s" % metrics.hammingLoss)
 
 # Subset accuracy
-print "Subset accuracy = %s" % metrics.subsetAccuracy
+print("Subset accuracy = %s" % metrics.subsetAccuracy)
 
 {% endhighlight %}
 
@@ -1283,10 +1283,10 @@ scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1])
 metrics = RegressionMetrics(scoreAndLabels)
 
 # Root mean sqaured error
-print "RMSE = %s" % metrics.rootMeanSquaredError
+print("RMSE = %s" % metrics.rootMeanSquaredError)
 
 # R-squared
-print "R-squared = %s" % metrics.r2
+print("R-squared = %s" % metrics.r2)
 
 {% endhighlight %}
 
@@ -1479,17 +1479,17 @@ valuesAndPreds = parsedData.map(lambda p: (float(model.predict(p.features)), p.l
 metrics = RegressionMetrics(valuesAndPreds)
 
 # Squared Error
-print "MSE = %s" % metrics.meanSquaredError
-print "RMSE = %s" % metrics.rootMeanSquaredError
+print("MSE = %s" % metrics.meanSquaredError)
+print("RMSE = %s" % metrics.rootMeanSquaredError)
 
 # R-squared
-print "R-squared = %s" % metrics.r2
+print("R-squared = %s" % metrics.r2)
 
 # Mean absolute error
-print "MAE = %s" % metrics.meanAbsoluteError
+print("MAE = %s" % metrics.meanAbsoluteError)
 
 # Explained variance
-print "Explained variance = %s" % metrics.explainedVariance
+print("Explained variance = %s" % metrics.explainedVariance)
 
 {% endhighlight %}
 

diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
@@ -221,7 +221,7 @@ model = word2vec.fit(inp)
 synonyms = model.findSynonyms('china', 40)
 
 for word, cosine_distance in synonyms:
-    print "{}: {}".format(word, cosine_distance)
+    print("{}: {}".format(word, cosine_distance))
 {% endhighlight %}
 </div>
 </div>

diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
@@ -95,9 +95,9 @@ mat = ... # an RDD of Vectors
 
 # Compute column summary statistics.
 summary = Statistics.colStats(mat)
-print summary.mean()
-print summary.variance()
-print summary.numNonzeros()
+print(summary.mean())
+print(summary.variance())
+print(summary.numNonzeros())
 
 {% endhighlight %}
 </div>
@@ -183,12 +183,12 @@ seriesY = ... # must have the same number of partitions and cardinality as serie
 
 # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a 
 # method is not specified, Pearson's method will be used by default. 
-print Statistics.corr(seriesX, seriesY, method="pearson")
+print(Statistics.corr(seriesX, seriesY, method="pearson"))
 
 data = ... # an RDD of Vectors
 # calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
 # If a method is not specified, Pearson's method will be used by default. 
-print Statistics.corr(data, method="pearson")
+print(Statistics.corr(data, method="pearson"))
 
 {% endhighlight %}
 </div>
@@ -398,14 +398,14 @@ vec = Vectors.dense(...) # a vector composed of the frequencies of events
 # compute the goodness of fit. If a second vector to test against is not supplied as a parameter,
 # the test runs against a uniform distribution.
 goodnessOfFitTestResult = Statistics.chiSqTest(vec)
-print goodnessOfFitTestResult # summary of the test including the p-value, degrees of freedom,
-                              # test statistic, the method used, and the null hypothesis.
+print(goodnessOfFitTestResult) # summary of the test including the p-value, degrees of freedom,
+                               # test statistic, the method used, and the null hypothesis.
 
 mat = Matrices.dense(...) # a contingency matrix
 
 # conduct Pearson's independence test on the input contingency matrix
 independenceTestResult = Statistics.chiSqTest(mat)
-print independenceTestResult  # summary of the test including the p-value, degrees of freedom...
+print(independenceTestResult)  # summary of the test including the p-value, degrees of freedom...
 
 obs = sc.parallelize(...)  # LabeledPoint(feature, label) .
 
@@ -415,8 +415,8 @@ obs = sc.parallelize(...)  # LabeledPoint(feature, label) .
 featureTestResults = Statistics.chiSqTest(obs)
 
 for i, result in enumerate(featureTestResults):
-    print "Column $d:" % (i + 1)
-    print result
+    print("Column $d:" % (i + 1))
+    print(result)
 {% endhighlight %}
 </div>
 

diff --git a/docs/quick-start.md b/docs/quick-start.md
@@ -406,7 +406,7 @@ logData = sc.textFile(logFile).cache()
 numAs = logData.filter(lambda s: 'a' in s).count()
 numBs = logData.filter(lambda s: 'b' in s).count()
 
-print "Lines with a: %i, lines with b: %i" % (numAs, numBs)
+print("Lines with a: %i, lines with b: %i" % (numAs, numBs))
 {% endhighlight %}
 
 

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
@@ -570,7 +570,7 @@ teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 1
 # The results of SQL queries are RDDs and support all the normal RDD operations.
 teenNames = teenagers.map(lambda p: "Name: " + p.name)
 for teenName in teenNames.collect():
-  print teenName
+  print(teenName)
 {% endhighlight %}
 
 </div>
@@ -752,7 +752,7 @@ results = sqlContext.sql("SELECT name FROM people")
 # The results of SQL queries are RDDs and support all the normal RDD operations.
 names = results.map(lambda p: "Name: " + p.name)
 for name in names.collect():
-  print name
+  print(name)
 {% endhighlight %}
 
 </div>
@@ -1006,7 +1006,7 @@ parquetFile.registerTempTable("parquetFile");
 teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
 teenNames = teenagers.map(lambda p: "Name: " + p.name)
 for teenName in teenNames.collect():
-  print teenName
+  print(teenName)
 {% endhighlight %}
 
 </div>

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
@@ -1525,7 +1525,7 @@ def getSqlContextInstance(sparkContext):
 words = ... # DStream of strings
 
 def process(time, rdd):
-    print "========= %s =========" % str(time)
+    print("========= %s =========" % str(time))
     try:
         # Get the singleton instance of SQLContext
         sqlContext = getSqlContextInstance(rdd.context)