docs: fix variable formatting for QandA nb (#2033)

2023-07-24 13:58:51 -07:00 · 2023-07-24 13:58:51 -07:00 · 8be8fe3e61
--- a/Services/Quickstart
+++ b/Services/Quickstart
@ -48,7 +48,7 @@
    "\n",
    "We’ll cover the following key steps:\n",
    "\n",
-    "1. Preprocessing PDF Documents: Learn how to load the PDF documents into a Spark DataFrame, read the documents using the [Form Recognizer Service](https://azure.microsoft.com/en-us/products/form-recognizer/) in Azure AI Services, and use SynapseML to split the documents into chunks.\n",
+    "1. Preprocessing PDF Documents: Learn how to load the PDF documents into a Spark DataFrame, read the documents using the [Azure AI Document Intelligence](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence) in Azure AI Services, and use SynapseML to split the documents into chunks.\n",
    "2. Embedding Generation and Storage: Learn how to generate embeddings for the chunks using SynapseML and [Azure OpenAI Services](https://azure.microsoft.com/en-us/products/cognitive-services/openai-service), store the embeddings in a vector store using [Azure Cognitive Search](https://azure.microsoft.com/en-us/products/search), and search the vector store to answer the user’s question.\n",
    "3. Question Answering Pipeline: Learn how to retrieve relevant document based on the user’s question and provide the answer using [Langchain](https://python.langchain.com/en/latest/index.html#)."
   ]
@ -298,7 +298,7 @@
    }
   },
   "source": [
-    "### Step 3: Read the documents using Azure AI Services Form Recognizer."
+    "### Step 3: Read the documents using Azure AI Document Intelligence."
   ]
  },
  {
@ -335,7 +335,7 @@
    "from synapse.ml.cognitive import AnalyzeDocument\n",
    "from pyspark.sql.functions import col\n",
    "\n",
-    "analyzeDocument = (\n",
+    "analyze_document = (\n",
    "    AnalyzeDocument()\n",
    "    .setPrebuiltModelId(\"prebuilt-layout\")\n",
    "    .setSubscriptionKey(ai_services_key)\n",
@ -348,7 +348,7 @@
    ")\n",
    "\n",
    "analyzed_df = (\n",
-    "    analyzeDocument.transform(df)\n",
+    "    analyze_document.transform(df)\n",
    "    .withColumn(\"output_content\", col(\"result.analyzeResult.content\"))\n",
    "    .withColumn(\"paragraphs\", col(\"result.analyzeResult.paragraphs\"))\n",
    ").cache()"
@ -701,7 +701,7 @@
    "\n",
    "# Define a UDF using the @udf decorator\n",
    "@udf(returnType=StringType())\n",
-    "def insertToCogSearch(idx, content, contentVector):\n",
+    "def insert_to_cog_search(idx, content, contentVector):\n",
    "    url = f\"https://{cogsearch_name}.search.windows.net/indexes/{cogsearch_index_name}/docs/index?api-version=2023-07-01-Preview\"\n",
    "\n",
    "    payload = json.dumps(\n",
@ -762,7 +762,7 @@
    ")  ## adding a column with id\n",
    "df_embeddings = df_embeddings.withColumn(\n",
    "    \"errorCogSearch\",\n",
-    "    insertToCogSearch(\n",
+    "    insert_to_cog_search(\n",
    "        df_embeddings[\"idx\"], df_embeddings[\"chunk\"], df_embeddings[\"embeddings\"]\n",
    "    ),\n",
    ")\n",
@ -791,7 +791,7 @@
    }
   },
   "source": [
-    "### Step 7: Ask a Question"
+    "### Step 7: Ask a Question."
   ]
  },
  {
@ -823,7 +823,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "userQuestion = \"What did the astronaut Edgar Mitchell call Earth?\"\n",
+    "user_question = \"What did the astronaut Edgar Mitchell call Earth?\"\n",
    "retrieve_k = 2  # Retrieve the top 2 documents from vector database"
   ]
  },
@ -836,11 +836,11 @@
    "# Ask a question and convert to embeddings\n",
    "\n",
    "\n",
-    "def genQuestionEmbedding(userQuestion):\n",
+    "def gen_question_embedding(user_question):\n",
    "    # Convert question to embedding using synapseML\n",
    "    from synapse.ml.cognitive import OpenAIEmbedding\n",
    "\n",
-    "    df_ques = spark.createDataFrame([(userQuestion, 1)], [\"questions\", \"dummy\"])\n",
+    "    df_ques = spark.createDataFrame([(user_question, 1)], [\"questions\", \"dummy\"])\n",
    "    embedding = (\n",
    "        OpenAIEmbedding()\n",
    "        .setSubscriptionKey(aoai_key)\n",
@ -852,16 +852,16 @@
    "    )\n",
    "    df_ques_embeddings = embedding.transform(df_ques)\n",
    "    row = df_ques_embeddings.collect()[0]\n",
-    "    questionEmbedding = row.embeddings.tolist()\n",
-    "    return questionEmbedding\n",
+    "    question_embedding = row.embeddings.tolist()\n",
+    "    return question_embedding\n",
    "\n",
    "\n",
-    "def retrieve_k_chunk(k, questionEmbedding):\n",
+    "def retrieve_k_chunk(k, question_embedding):\n",
    "    # Retrieve the top K entries\n",
    "    url = f\"https://{cogsearch_name}.search.windows.net/indexes/{cogsearch_index_name}/docs/search?api-version=2023-07-01-Preview\"\n",
    "\n",
    "    payload = json.dumps(\n",
-    "        {\"vector\": {\"value\": questionEmbedding, \"fields\": \"contentVector\", \"k\": 2}}\n",
+    "        {\"vector\": {\"value\": question_embedding, \"fields\": \"contentVector\", \"k\": 2}}\n",
    "    )\n",
    "    headers = {\n",
    "        \"Content-Type\": \"application/json\",\n",
@ -875,8 +875,8 @@
    "\n",
    "\n",
    "# Generate embeddings for the question and retrieve the top k document chunks\n",
-    "questionEmbedding = genQuestionEmbedding(userQuestion)\n",
-    "output = retrieve_k_chunk(retrieve_k, questionEmbedding)"
+    "question_embedding = gen_question_embedding(user_question)\n",
+    "output = retrieve_k_chunk(retrieve_k, question_embedding)"
   ]
  },
  {
@ -899,7 +899,7 @@
    }
   },
   "source": [
-    "### Step 8: Respond to a User’s Question"
+    "### Step 8: Respond to a User’s Question."
   ]
  },
  {
@ -968,7 +968,7 @@
   "outputs": [],
   "source": [
    "# Define a Question Answering chain function using LangChain\n",
-    "def QA_chain_func():\n",
+    "def qa_chain_func():\n",
    "\n",
    "    # Define llm model\n",
    "    llm = AzureOpenAI(\n",
@ -999,8 +999,8 @@
    "context = [i[\"content\"] for i in output[\"value\"]]\n",
    "\n",
    "# Make a Quesion Answer chain function and pass\n",
-    "qa_chain = QA_chain_func()\n",
-    "answer = qa_chain.run({\"context\": context, \"query\": userQuestion})\n",
+    "qa_chain = qa_chain_func()\n",
+    "answer = qa_chain.run({\"context\": context, \"query\": user_question})\n",
    "\n",
    "print(answer)"
   ]