From e79e9da0a9a65def3e73f546a89e38d0ac3cb1a8 Mon Sep 17 00:00:00 2001
From: Sebastian Fellner <sebastian.fellner@student.tugraz.at>
Date: Thu, 20 Jan 2022 12:31:45 +0100
Subject: [PATCH] Small tweaks

---
 average.py   | 6 ++++--
 wordcount.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/average.py b/average.py
index 6da298a..fcbd1f0 100755
--- a/average.py
+++ b/average.py
@@ -9,17 +9,18 @@ Example: Calculates running averages of raw numerical data.
 """
 def main():
     sc = SparkContext(MASTER, "spark-dsp-wordcount")
-    # Use "batches" of 2 seconds each
+    # Use batches of 2 seconds each
     ssc = StreamingContext(sc, 2)
     # This directory will be used to store checkpoints
     # Allows recovery from errors without data loss
     ssc.checkpoint("checkpoint/average")
 
+    # Create a stream based on raw data
     lines = ssc.textFileStream("numeric-data")
     results = (lines.map(lambda line: float(line))
         # Filtering operation: Only keep non-negative values
         .filter(lambda x: x >= 0)
-        # Enhancement operation: Pair each value with "1" (necessary by aggregation below)
+        # Enhancement operation: Pair each value with "1" (necessary for aggregation below)
         .map(lambda x: (x, 1))
         # Aggregation operation: Calculate sums and counts per window
         # Uses a sliding window, i.e., values can be part of multiple windows
@@ -29,6 +30,7 @@ def main():
         # Calculate the average per sliding window
         .map(lambda aggr: aggr[0] / aggr[1])
     )
+    # Continuously print the resulting stream
     results.pprint()
 
     ssc.start()
diff --git a/wordcount.py b/wordcount.py
index aaa899a..ce105bb 100755
--- a/wordcount.py
+++ b/wordcount.py
@@ -5,7 +5,7 @@ from pyspark.streaming import StreamingContext
 MASTER = "local[*]"
 
 """
-Example: Counts the number of word occurrences of a text input stream.
+Example: Counts the number of occurrences for each word of a text input stream.
 """
 def main():
     sc = SparkContext(MASTER, "spark-dsp-wordcount")
-- 
GitLab