df = spark.read.format("csv")\\
  .option("header", "true")\\
  .option("inferSchema", "true")\\
  .load("data/retail-data/all/*.csv")\\
  .coalesce(5)
df.cache()
df.createOrReplaceTempView("dfTable")

# COMMAND ----------

from pyspark.sql.functions import count
df.select(count("StockCode")).show() # 541909

집계 함수

count

from pyspark.sql.functions import count
df.select(count("StockCode")).show() # 541909

countDistinct

from pyspark.sql.functions import countDistinct
df.select(countDistinct("StockCode")).show() # 4070