Updating HDInsight lab

2017-05-02 20:28:55 -07:00 · 2017-05-02 20:28:55 -07:00 · dc7af7adec
--- a/HDInsight/DataScienceLab/hands-on-lab.md
+++ b/HDInsight/DataScienceLab/hands-on-lab.md
@ -28,7 +28,7 @@ Learn the basics of data science using Spark
 This notebook demonstrates how to use MLLib, Sparks's built-in machine
 learning libraries, to perform a simple prediction on an open dataset.

-**Launch Jupyter Notebooks **
+### Launch Jupyter Notebook
 Navigate to this link and add your cluster name and username/password provided. https://<Fill_ME_IN>.azurehdinsight.net/jupyter/tree/PySpark

 -   Username: &lt;FILL\_ME\_IN&gt;
@ -61,7 +61,7 @@ Read the dataset from a csv file stored in Azure Blob Storage.

 ```python
 inspections =
-spark.read.csv('wasb:///HdiSamples/HdiSamples/FoodInspectionData/FoodInspections1.csv',
+spark.read.csv('wasb:///HdiSamples/HdiSamples/FoodInspectionData/Food_Inspections1.csv',
 inferSchema=True)
 ```
 #### Inspect Schema
@ -86,12 +86,12 @@ df.select('results').distinct().show()

 import matplotlib.pyplot as plt

-labels = count\_results\_df\['results'\]
+labels = count_results_df['results']

-sizes = count\_results\_df\['cnt'\]
+sizes = count_results_df['cnt']

-colors = \['turquoise', 'seagreen', 'mediumslateblue', 'palegreen',
-'coral'\]
+colors = ['turquoise', 'seagreen', 'mediumslateblue', 'palegreen',
+'coral']

 plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)

@ -110,7 +110,7 @@ We can use the model we created earlier to predict what the results of
 new inspections will be, based on the violations that were observed.
 ```python
 testData =
-selectInterestingColumns(spark.read.csv('wasb:///HdiSamples/HdiSamples/FoodInspectionData/Food\_Inspections2.csv',
+selectInterestingColumns(spark.read.csv('wasb:///HdiSamples/HdiSamples/FoodInspectionData/Food_Inspections2.csv',
 inferSchema=True))

 testDf = testData.where("results = 'Fail' OR results = 'Pass' OR results
@ -137,7 +137,7 @@ print("There were %d inspections and there were %d successful
 predictions" % (numInspections, numSuccesses))

 print("This is a %d%% success rate" % (float(numSuccesses) /
-float(numInspections) \* 100))
+float(numInspections) * 100))
 ```
 #### Final visualization to help us reason about the results of this test.

@ -302,7 +302,7 @@ print("There were %d User sessions and there were %d successful
 predictions" % (numInspections, numSuccesses))

 print("This is a %d%% success rate" % (float(numSuccesses) /
-float(numInspections) \* 100))
+float(numInspections) * 100))
 ```
 #### Final visualization to help us reason about the results of this test.

--- a/HDInsight/HiveLab/hands-on-lab.md
+++ b/HDInsight/HiveLab/hands-on-lab.md
@ -83,8 +83,7 @@ PaymentAmount varchar(50)

 STORED AS TEXTFILE LOCATION 'wasb:///hadooplabs/Lab1/weblogs/';

-LOAD DATA INPATH 'wasb:///hadooplabs/Lab1/weblogs.csv' INTO TABLE
-HDILABDB.weblogs;
+LOAD DATA INPATH 'wasb:///hadooplabs/Lab1/weblogs.csv' INTO TABLE HDILABDB.weblogs;
 ```
 -   Click Execute to run the query. Once the query complete, the Query
    Process Results, status will change to **SUCCEEDED**.
@ -202,14 +201,11 @@ month. The output should look like this.

 DROP TABLE IF EXISTS HDILABDB.SalesbyCategory;

-CREATE TABLE HDILABDB.SalesbyCategory ROW FORMAT DELIMITED
-
-FIELDS TERMINATED by '\1' lines TERMINATED by '\n'
+CREATE TABLE HDILABDB.SalesbyCategory ROW FORMAT DELIMITED FIELDS TERMINATED by '\1' lines TERMINATED by '\n'

 STORED AS TEXTFILE LOCATION 'wasb:///hadooplabs/Lab1/SalesbyCategory'

 AS
-
 Select

 categoryname,
@ -218,12 +214,8 @@ Sum(Quantity) As quantitysold,

 Sum(PaymentAmount) As totalamount

-FROM HDILABDB.weblogs
-
-WHERE PurchaseType="Purchased"
-
+FROM HDILABDB.weblogs WHERE PurchaseType="Purchased"
 GROUP BY CategoryName
-
 ORDER BY QuantitySold Desc;

 Select * from HDILABDB.SalesbyCategory LIMIT 10
@ -250,10 +242,7 @@ sold per book. The output should look like this.
 -- Top Selling Books

 DROP TABLE IF EXISTS HDILABDB.SalesbyBooks;
-
-CREATE TABLE HDILABDB.SalesbyBooks ROW FORMAT DELIMITED FIELDS
-
-TERMINATED by '\1' lines TERMINATED by '\n'
+CREATE TABLE HDILABDB.SalesbyBooks ROW FORMAT DELIMITED FIELDS TERMINATED by '\1' lines TERMINATED by '\n'

 STORED AS TEXTFILE LOCATION 'wasb:///hadooplabs/Lab1/SalesbyBooks'