Skip to content

Commit

Permalink
[SPARK-9490] [DOCS] [MLLIB] MLlib evaluation metrics guide example py…
Browse files Browse the repository at this point in the history
…thon code uses deprecated print statement

Use print(x) not print x for Python 3 in eval examples
CC sethah mengxr -- just wanted to close this out before 1.5

Author: Sean Owen <[email protected]>

Closes apache#7822 from srowen/SPARK-9490 and squashes the following commits:

01abeba [Sean Owen] Change "print x" to "print(x)" in the rest of the docs too
bd7f7fb [Sean Owen] Use print(x) not print x for Python 3 in eval examples
  • Loading branch information
srowen authored and mengxr committed Jul 31, 2015
1 parent 815c824 commit 873ab0f
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 50 deletions.
2 changes: 1 addition & 1 deletion docs/ml-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ test = sc.parallelize([(4L, "spark i j k"),
prediction = model.transform(test)
selected = prediction.select("id", "text", "prediction")
for row in selected.collect():
print row
print(row)

sc.stop()
{% endhighlight %}
Expand Down
66 changes: 33 additions & 33 deletions docs/mllib-evaluation-metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,10 @@ predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp
metrics = BinaryClassificationMetrics(predictionAndLabels)

# Area under precision-recall curve
print "Area under PR = %s" % metrics.areaUnderPR
print("Area under PR = %s" % metrics.areaUnderPR)

# Area under ROC curve
print "Area under ROC = %s" % metrics.areaUnderROC
print("Area under ROC = %s" % metrics.areaUnderROC)

{% endhighlight %}

Expand Down Expand Up @@ -606,24 +606,24 @@ metrics = MulticlassMetrics(predictionAndLabels)
precision = metrics.precision()
recall = metrics.recall()
f1Score = metrics.fMeasure()
print "Summary Stats"
print "Precision = %s" % precision
print "Recall = %s" % recall
print "F1 Score = %s" % f1Score
print("Summary Stats")
print("Precision = %s" % precision)
print("Recall = %s" % recall)
print("F1 Score = %s" % f1Score)

# Statistics by class
labels = data.map(lambda lp: lp.label).distinct().collect()
for label in sorted(labels):
print "Class %s precision = %s" % (label, metrics.precision(label))
print "Class %s recall = %s" % (label, metrics.recall(label))
print "Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))
print("Class %s precision = %s" % (label, metrics.precision(label)))
print("Class %s recall = %s" % (label, metrics.recall(label)))
print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))

# Weighted stats
print "Weighted recall = %s" % metrics.weightedRecall
print "Weighted precision = %s" % metrics.weightedPrecision
print "Weighted F(1) Score = %s" % metrics.weightedFMeasure()
print "Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)
print "Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate
print("Weighted recall = %s" % metrics.weightedRecall)
print("Weighted precision = %s" % metrics.weightedPrecision)
print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
{% endhighlight %}

</div>
Expand Down Expand Up @@ -881,28 +881,28 @@ scoreAndLabels = sc.parallelize([
metrics = MultilabelMetrics(scoreAndLabels)

# Summary stats
print "Recall = %s" % metrics.recall()
print "Precision = %s" % metrics.precision()
print "F1 measure = %s" % metrics.f1Measure()
print "Accuracy = %s" % metrics.accuracy
print("Recall = %s" % metrics.recall())
print("Precision = %s" % metrics.precision())
print("F1 measure = %s" % metrics.f1Measure())
print("Accuracy = %s" % metrics.accuracy)

# Individual label stats
labels = scoreAndLabels.flatMap(lambda x: x[1]).distinct().collect()
for label in labels:
print "Class %s precision = %s" % (label, metrics.precision(label))
print "Class %s recall = %s" % (label, metrics.recall(label))
print "Class %s F1 Measure = %s" % (label, metrics.f1Measure(label))
print("Class %s precision = %s" % (label, metrics.precision(label)))
print("Class %s recall = %s" % (label, metrics.recall(label)))
print("Class %s F1 Measure = %s" % (label, metrics.f1Measure(label)))

# Micro stats
print "Micro precision = %s" % metrics.microPrecision
print "Micro recall = %s" % metrics.microRecall
print "Micro F1 measure = %s" % metrics.microF1Measure
print("Micro precision = %s" % metrics.microPrecision)
print("Micro recall = %s" % metrics.microRecall)
print("Micro F1 measure = %s" % metrics.microF1Measure)

# Hamming loss
print "Hamming loss = %s" % metrics.hammingLoss
print("Hamming loss = %s" % metrics.hammingLoss)

# Subset accuracy
print "Subset accuracy = %s" % metrics.subsetAccuracy
print("Subset accuracy = %s" % metrics.subsetAccuracy)

{% endhighlight %}

Expand Down Expand Up @@ -1283,10 +1283,10 @@ scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1])
metrics = RegressionMetrics(scoreAndLabels)

# Root mean sqaured error
print "RMSE = %s" % metrics.rootMeanSquaredError
print("RMSE = %s" % metrics.rootMeanSquaredError)

# R-squared
print "R-squared = %s" % metrics.r2
print("R-squared = %s" % metrics.r2)

{% endhighlight %}

Expand Down Expand Up @@ -1479,17 +1479,17 @@ valuesAndPreds = parsedData.map(lambda p: (float(model.predict(p.features)), p.l
metrics = RegressionMetrics(valuesAndPreds)

# Squared Error
print "MSE = %s" % metrics.meanSquaredError
print "RMSE = %s" % metrics.rootMeanSquaredError
print("MSE = %s" % metrics.meanSquaredError)
print("RMSE = %s" % metrics.rootMeanSquaredError)

# R-squared
print "R-squared = %s" % metrics.r2
print("R-squared = %s" % metrics.r2)

# Mean absolute error
print "MAE = %s" % metrics.meanAbsoluteError
print("MAE = %s" % metrics.meanAbsoluteError)

# Explained variance
print "Explained variance = %s" % metrics.explainedVariance
print("Explained variance = %s" % metrics.explainedVariance)

{% endhighlight %}

Expand Down
2 changes: 1 addition & 1 deletion docs/mllib-feature-extraction.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ model = word2vec.fit(inp)
synonyms = model.findSynonyms('china', 40)

for word, cosine_distance in synonyms:
print "{}: {}".format(word, cosine_distance)
print("{}: {}".format(word, cosine_distance))
{% endhighlight %}
</div>
</div>
Expand Down
20 changes: 10 additions & 10 deletions docs/mllib-statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ mat = ... # an RDD of Vectors

# Compute column summary statistics.
summary = Statistics.colStats(mat)
print summary.mean()
print summary.variance()
print summary.numNonzeros()
print(summary.mean())
print(summary.variance())
print(summary.numNonzeros())

{% endhighlight %}
</div>
Expand Down Expand Up @@ -183,12 +183,12 @@ seriesY = ... # must have the same number of partitions and cardinality as serie

# Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
# method is not specified, Pearson's method will be used by default.
print Statistics.corr(seriesX, seriesY, method="pearson")
print(Statistics.corr(seriesX, seriesY, method="pearson"))

data = ... # an RDD of Vectors
# calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
# If a method is not specified, Pearson's method will be used by default.
print Statistics.corr(data, method="pearson")
print(Statistics.corr(data, method="pearson"))

{% endhighlight %}
</div>
Expand Down Expand Up @@ -398,14 +398,14 @@ vec = Vectors.dense(...) # a vector composed of the frequencies of events
# compute the goodness of fit. If a second vector to test against is not supplied as a parameter,
# the test runs against a uniform distribution.
goodnessOfFitTestResult = Statistics.chiSqTest(vec)
print goodnessOfFitTestResult # summary of the test including the p-value, degrees of freedom,
# test statistic, the method used, and the null hypothesis.
print(goodnessOfFitTestResult) # summary of the test including the p-value, degrees of freedom,
# test statistic, the method used, and the null hypothesis.

mat = Matrices.dense(...) # a contingency matrix

# conduct Pearson's independence test on the input contingency matrix
independenceTestResult = Statistics.chiSqTest(mat)
print independenceTestResult # summary of the test including the p-value, degrees of freedom...
print(independenceTestResult) # summary of the test including the p-value, degrees of freedom...

obs = sc.parallelize(...) # LabeledPoint(feature, label) .

Expand All @@ -415,8 +415,8 @@ obs = sc.parallelize(...) # LabeledPoint(feature, label) .
featureTestResults = Statistics.chiSqTest(obs)

for i, result in enumerate(featureTestResults):
print "Column $d:" % (i + 1)
print result
print("Column $d:" % (i + 1))
print(result)
{% endhighlight %}
</div>

Expand Down
2 changes: 1 addition & 1 deletion docs/quick-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ logData = sc.textFile(logFile).cache()
numAs = logData.filter(lambda s: 'a' in s).count()
numBs = logData.filter(lambda s: 'b' in s).count()

print "Lines with a: %i, lines with b: %i" % (numAs, numBs)
print("Lines with a: %i, lines with b: %i" % (numAs, numBs))
{% endhighlight %}


Expand Down
6 changes: 3 additions & 3 deletions docs/sql-programming-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 1
# The results of SQL queries are RDDs and support all the normal RDD operations.
teenNames = teenagers.map(lambda p: "Name: " + p.name)
for teenName in teenNames.collect():
print teenName
print(teenName)
{% endhighlight %}

</div>
Expand Down Expand Up @@ -752,7 +752,7 @@ results = sqlContext.sql("SELECT name FROM people")
# The results of SQL queries are RDDs and support all the normal RDD operations.
names = results.map(lambda p: "Name: " + p.name)
for name in names.collect():
print name
print(name)
{% endhighlight %}

</div>
Expand Down Expand Up @@ -1006,7 +1006,7 @@ parquetFile.registerTempTable("parquetFile");
teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
teenNames = teenagers.map(lambda p: "Name: " + p.name)
for teenName in teenNames.collect():
print teenName
print(teenName)
{% endhighlight %}

</div>
Expand Down
2 changes: 1 addition & 1 deletion docs/streaming-programming-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -1525,7 +1525,7 @@ def getSqlContextInstance(sparkContext):
words = ... # DStream of strings

def process(time, rdd):
print "========= %s =========" % str(time)
print("========= %s =========" % str(time))
try:
# Get the singleton instance of SQLContext
sqlContext = getSqlContextInstance(rdd.context)
Expand Down

0 comments on commit 873ab0f

Please sign in to comment.