Prep 0.7.0 release (#361)

imback82 · web-flow · commit 8d9148c367ca · 2019-12-04T08:55:32.000-08:00
diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@
     <tbody align="center">
         <tr>
             <td >2.3.*</td>
-            <td rowspan=5><a href="https://github.com/dotnet/spark/releases/tag/v0.6.0">v0.6.0</a></td>
+            <td rowspan=5><a href="https://github.com/dotnet/spark/releases/tag/v0.7.0">v0.7.0</a></td>
         </tr>
         <tr>
             <td>2.4.0</td>
diff --git a/benchmark/scala/pom.xml b/benchmark/scala/pom.xml
@@ -3,7 +3,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.microsoft.spark</groupId>
   <artifactId>microsoft-spark-benchmark</artifactId>
-  <version>0.6.0</version>
+  <version>0.7.0</version>
   <inceptionYear>2019</inceptionYear>
   <properties>
     <encoding>UTF-8</encoding>
diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md
@@ -184,15 +184,15 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          spark-submit \
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
-         ~/dotnet.spark/src/scala/microsoft-spark-2.3.x/target/microsoft-spark-2.3.x-0.6.0.jar \
+         ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
          Microsoft.Spark.CSharp.Examples Sql.Batch.Basic $SPARK_HOME/examples/src/main/resources/people.json
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredNetworkWordCount](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredNetworkWordCount.cs)**
          ```bash
          spark-submit \
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
-         ~/dotnet.spark/src/scala/microsoft-spark-2.3.x/target/microsoft-spark-2.3.x-0.6.0.jar \
+         ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
          Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredNetworkWordCount localhost 9999
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (maven accessible)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
@@ -201,7 +201,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.2 \
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
-         ~/dotnet.spark/src/scala/microsoft-spark-2.3.x/target/microsoft-spark-2.3.x-0.6.0.jar \
+         ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
          Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (jars provided)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
@@ -210,7 +210,7 @@ Once you build the samples, you can use `spark-submit` to submit your .NET Core
          --jars path/to/net.jpountz.lz4/lz4-1.3.0.jar,path/to/org.apache.kafka/kafka-clients-0.10.0.1.jar,path/to/org.apache.spark/spark-sql-kafka-0-10_2.11-2.3.2.jar,`path/to/org.slf4j/slf4j-api-1.7.6.jar,path/to/org.spark-project.spark/unused-1.0.0.jar,path/to/org.xerial.snappy/snappy-java-1.1.2.6.jar \
          --class org.apache.spark.deploy.dotnet.DotnetRunner \
          --master local \
-         ~/dotnet.spark/src/scala/microsoft-spark-2.3.x/target/microsoft-spark-2.3.x-0.6.0.jar \
+         ~/dotnet.spark/src/scala/microsoft-spark-<version>/target/microsoft-spark-<version>.jar \
          Microsoft.Spark.CSharp.Examples Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
           ```
 
diff --git a/docs/building/windows-instructions.md b/docs/building/windows-instructions.md
@@ -216,15 +216,15 @@ Once you build the samples, running them will be through `spark-submit` regardle
          spark-submit.cmd `
          --class org.apache.spark.deploy.dotnet.DotnetRunner `
          --master local `
-         C:\github\dotnet-spark\src\scala\microsoft-spark-2.3.x\target\microsoft-spark-2.3.x-0.6.0.jar `
+         C:\github\dotnet-spark\src\scala\microsoft-spark-<version>\target\microsoft-spark-<version>.jar `
          Microsoft.Spark.CSharp.Examples.exe Sql.Batch.Basic %SPARK_HOME%\examples\src\main\resources\people.json
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredNetworkWordCount](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredNetworkWordCount.cs)**
          ```powershell
          spark-submit.cmd `
          --class org.apache.spark.deploy.dotnet.DotnetRunner `
          --master local `
-         C:\github\dotnet-spark\src\scala\microsoft-spark-2.3.x\target\microsoft-spark-2.3.x-0.6.0.jar `
+         C:\github\dotnet-spark\src\scala\microsoft-spark-<version>\target\microsoft-spark-<version>.jar `
          Microsoft.Spark.CSharp.Examples.exe Sql.Streaming.StructuredNetworkWordCount localhost 9999
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (maven accessible)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
@@ -233,7 +233,7 @@ Once you build the samples, running them will be through `spark-submit` regardle
          --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.2 `
          --class org.apache.spark.deploy.dotnet.DotnetRunner `
          --master local `
-         C:\github\dotnet-spark\src\scala\microsoft-spark-2.3.x\target\microsoft-spark-2.3.x-0.6.0.jar `
+         C:\github\dotnet-spark\src\scala\microsoft-spark-<version>\target\microsoft-spark-<version>.jar `
          Microsoft.Spark.CSharp.Examples.exe Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
          ```
      - **[Microsoft.Spark.Examples.Sql.Streaming.StructuredKafkaWordCount (jars provided)](../../examples/Microsoft.Spark.CSharp.Examples/Sql/Streaming/StructuredKafkaWordCount.cs)**
@@ -242,7 +242,7 @@ Once you build the samples, running them will be through `spark-submit` regardle
          --jars path\to\net.jpountz.lz4\lz4-1.3.0.jar,path\to\org.apache.kafka\kafka-clients-0.10.0.1.jar,path\to\org.apache.spark\spark-sql-kafka-0-10_2.11-2.3.2.jar,`path\to\org.slf4j\slf4j-api-1.7.6.jar,path\to\org.spark-project.spark\unused-1.0.0.jar,path\to\org.xerial.snappy\snappy-java-1.1.2.6.jar `
          --class org.apache.spark.deploy.dotnet.DotnetRunner `
          --master local `
-         C:\github\dotnet-spark\src\scala\microsoft-spark-2.3.x\target\microsoft-spark-2.3.x-0.6.0.jar `
+         C:\github\dotnet-spark\src\scala\microsoft-spark-<version>\target\microsoft-spark-<version>.jar `
          Microsoft.Spark.CSharp.Examples.exe Sql.Streaming.StructuredKafkaWordCount localhost:9092 subscribe test
           ```
 
diff --git a/docs/deploy-worker-udf-binaries.md b/docs/deploy-worker-udf-binaries.md
@@ -16,7 +16,7 @@ When deploying workers and writing UDFs, there are a few commonly used environme
   </tr>
   <tr>
     <td><b>DOTNET_WORKER_DIR</b></td>
-    <td>Path where the <code>Microsoft.Spark.Worker</code> binary has been generated.</br>It's used by the Spark driver and will be passed to Spark executors. If this variable is not set up, the Spark executors will search the path specified in the <code>PATH</code> environment variable.</br><i>e.g. "C:\bin\Microsoft.Spark.Worker-0.6.0"</i></td>
+    <td>Path where the <code>Microsoft.Spark.Worker</code> binary has been generated.</br>It's used by the Spark driver and will be passed to Spark executors. If this variable is not set up, the Spark executors will search the path specified in the <code>PATH</code> environment variable.</br><i>e.g. "C:\bin\Microsoft.Spark.Worker"</i></td>
   </tr>
   <tr>
     <td><b>DOTNET_ASSEMBLY_SEARCH_PATHS</b></td>
@@ -50,7 +50,7 @@ Once the Spark application is [bundled](https://spark.apache.org/docs/latest/sub
   </tr>
   <tr>
     <td><b>--conf</b></td>
-      <td>Arbitrary Spark configuration property in <code>key=value</code> format.</br><i>e.g. spark.yarn.appMasterEnv.DOTNET_WORKER_DIR=.\worker\Microsoft.Spark.Worker-0.6.0</i></td>
+      <td>Arbitrary Spark configuration property in <code>key=value</code> format.</br><i>e.g. spark.yarn.appMasterEnv.DOTNET_WORKER_DIR=.\worker\Microsoft.Spark.Worker</i></td>
   </tr>
   <tr>
     <td><b>--files</b></td>
@@ -68,12 +68,12 @@ Once the Spark application is [bundled](https://spark.apache.org/docs/latest/sub
         <li>Please note that this option is only applicable for yarn mode.</li>
         <li>It supports specifying file names with # similar to Hadoop.</br>
       </ul>
-      <i>e.g. <code>hdfs://&lt;path to your worker file&gt;/Microsoft.Spark.Worker.net461.win-x64-0.6.0.zip#worker</code>. This will copy and extract the zip file to <code>worker</code> folder.</i></li></td>
+      <i>e.g. <code>hdfs://&lt;path to your worker file&gt;/Microsoft.Spark.Worker.zip#worker</code>. This will copy and extract the zip file to <code>worker</code> folder.</i></li></td>
   </tr>
   <tr>
     <td><b>application-jar</b></td>
     <td>Path to a bundled jar including your application and all dependencies.</br>
-    <i>e.g. hdfs://&lt;path to your jar&gt;/microsoft-spark-2.4.x-0.6.0.jar</i></td>
+    <i>e.g. hdfs://&lt;path to your jar&gt;/microsoft-spark-&lt;version&gt;.jar</i></td>
   </tr>
   <tr>
     <td><b>application-arguments</b></td>
@@ -95,10 +95,9 @@ Once the Spark application is [bundled](https://spark.apache.org/docs/latest/sub
 **Answer:** Please try restarting your PowerShell window (or other command windows) first so that it can take the latest environment variable values. Then start your program.
 
 #### 3. Question: After submitting my Spark application, I get the error `System.TypeLoadException: Could not load type 'System.Runtime.Remoting.Contexts.Context'`.
-> **Command:** %SPARK_HOME%\bin\spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --master local microsoft-spark-2.4.x-0.6.0.jar  mySparkApp.exe</br>
-**Error:** [ ] [ ] [Error] [TaskRunner] [0] ProcessStream() failed with exception: System.TypeLoadException: Could not load type 'System.Runtime.Remoting.Contexts.Context' from assembly 'mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=...'.
+> **Error:** [ ] [ ] [Error] [TaskRunner] [0] ProcessStream() failed with exception: System.TypeLoadException: Could not load type 'System.Runtime.Remoting.Contexts.Context' from assembly 'mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=...'.
 
-**Answer:** Please check the `Microsoft.Spark.Worker` version you are using. We currently provide two versions: **.NET Framework 4.6.1** and **.NET Core 2.1.x**. In this case, `Microsoft.Spark.Worker.net461.win-x64-0.6.0` (which you can download [here](https://github.com/dotnet/spark/releases)) should be used since `System.Runtime.Remoting.Contexts.Context` is only for .NET Framework.
+**Answer:** Please check the `Microsoft.Spark.Worker` version you are using. We currently provide two versions: **.NET Framework 4.6.1** and **.NET Core 2.1.x**. In this case, `Microsoft.Spark.Worker.net461.win-x64-<version>` (which you can download [here](https://github.com/dotnet/spark/releases)) should be used since `System.Runtime.Remoting.Contexts.Context` is only for .NET Framework.
 
 #### 4. Question: How to run my spark application with UDFs on YARN? Which environment variables and parameters should I use?
 
@@ -108,9 +107,9 @@ spark-submit \
 --class org.apache.spark.deploy.dotnet.DotnetRunner \
 --master yarn \
 --deploy-mode cluster \
---conf spark.yarn.appMasterEnv.DOTNET_WORKER_DIR=./worker/Microsoft.Spark.Worker-0.6.0 \
+--conf spark.yarn.appMasterEnv.DOTNET_WORKER_DIR=./worker/Microsoft.Spark.Worker-<version> \
 --conf spark.yarn.appMasterEnv.DOTNET_ASSEMBLY_SEARCH_PATHS=./udfs \
---archives hdfs://<path to your files>/Microsoft.Spark.Worker.net461.win-x64-0.6.0.zip#worker,hdfs://<path to your files>/mySparkApp.zip#udfs \
-hdfs://<path to jar file>/microsoft-spark-2.4.x-0.6.0.jar \
+--archives hdfs://<path to your files>/Microsoft.Spark.Worker.net461.win-x64-<version>.zip#worker,hdfs://<path to your files>/mySparkApp.zip#udfs \
+hdfs://<path to jar file>/microsoft-spark-2.4.x-<version>.jar \
 hdfs://<path to your files>/mySparkApp.zip mySparkApp
 ```
diff --git a/docs/release-notes/0.7/release-0.7.md b/docs/release-notes/0.7/release-0.7.md
@@ -0,0 +1,55 @@
+# .NET for Apache Spark 0.7 Release Notes
+
+### New Features and Improvements
+
+* Expose `SparkContext.setLogLevel()` to change log level programmatically ([#360](https://github.com/dotnet/spark/pull/360))
+* Expose `Schema()` for `DataFrameReader` and `DataStreamReader`([#248](https://github.com/dotnet/spark/pull/248))
+* Expose `Column.Apply()` ([#323](https://github.com/dotnet/spark/pull/323))
+* "Deprecated" annotation is added to the following functions:
+  * `WindowSpec.RangeBetween(Column start, Column end)`
+  * `Window.RangeBetween(Column start, Column end)`
+  * `Functions.UnboundedPreceding()`
+  * `Functions.UnboundedFollowing()`
+  * `Functions.CurrentRow()`
+
+### Breaking Changes
+* The following APIs have been removed due to the thread-local variable dependency (see [#332](https://github.com/dotnet/spark/pull/332) and [#333](https://github.com/dotnet/spark/issues/333) for more detail):
+  * `SparkSession.ClearActiveSession()`
+  * `SparkSession.GetActiveSession()`
+  * `SparkSession.SetActiveSession()`
+
+### Supported Spark Versions
+
+The following table outlines the supported Spark versions along with the microsoft-spark JAR to use with:
+
+<table>
+    <thead>
+        <tr>
+            <th>Spark Version</th>
+            <th>microsoft-spark JAR</th>
+        </tr>
+    </thead>
+    <tbody align="center">
+        <tr>
+            <td>2.3.*</td>
+            <td>microsoft-spark-2.3.x-0.7.0.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.0</td>
+            <td rowspan=4>microsoft-spark-2.4.x-0.7.0.jar</td>
+        </tr>
+        <tr>
+            <td>2.4.1</td>
+        </tr>
+        <tr>
+            <td>2.4.3</td>
+        </tr>
+        <tr>
+            <td>2.4.4</td>
+        </tr>
+        <tr>
+            <td>2.4.2</td>
+            <td><a href="https://github.com/dotnet/spark/issues/60">Not supported</a></td>
+        </tr>
+    </tbody>
+</table>
diff --git a/eng/Versions.props b/eng/Versions.props
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
-    <VersionPrefix>0.6.0</VersionPrefix>
+    <VersionPrefix>0.7.0</VersionPrefix>
     <PreReleaseVersionLabel>prerelease</PreReleaseVersionLabel>
     <RestoreSources>
       $(RestoreSources);
diff --git a/src/scala/microsoft-spark-2.3.x/pom.xml b/src/scala/microsoft-spark-2.3.x/pom.xml
@@ -4,7 +4,7 @@
   <parent>
     <groupId>com.microsoft.scala</groupId>
     <artifactId>microsoft-spark</artifactId>
-    <version>0.6.0</version>
+    <version>0.7.0</version>
   </parent>
   <artifactId>microsoft-spark-2.3.x</artifactId>
   <inceptionYear>2019</inceptionYear>
diff --git a/src/scala/microsoft-spark-2.4.x/pom.xml b/src/scala/microsoft-spark-2.4.x/pom.xml
@@ -4,7 +4,7 @@
   <parent>
     <groupId>com.microsoft.scala</groupId>
     <artifactId>microsoft-spark</artifactId>
-    <version>0.6.0</version>
+    <version>0.7.0</version>
   </parent>
   <artifactId>microsoft-spark-2.4.x</artifactId>
   <inceptionYear>2019</inceptionYear>
diff --git a/src/scala/microsoft-spark-3.0.x/pom.xml b/src/scala/microsoft-spark-3.0.x/pom.xml
@@ -4,7 +4,7 @@
   <parent>
     <groupId>com.microsoft.scala</groupId>
     <artifactId>microsoft-spark</artifactId>
-    <version>0.6.0</version>
+    <version>0.7.0</version>
   </parent>
   <artifactId>microsoft-spark-3.0.x</artifactId>
   <inceptionYear>2019</inceptionYear>
diff --git a/src/scala/pom.xml b/src/scala/pom.xml
@@ -4,15 +4,14 @@
   <groupId>com.microsoft.scala</groupId>
   <artifactId>microsoft-spark</artifactId>
   <packaging>pom</packaging>
-  <version>0.6.0</version>
+  <version>0.7.0</version>
   <properties>
     <encoding>UTF-8</encoding>
   </properties>
 
   <modules>
     <module>microsoft-spark-2.3.x</module>
     <module>microsoft-spark-2.4.x</module>
-    <module>microsoft-spark-3.0.x</module>
   </modules>
 
   <pluginRepositories>