1. 程式人生 > 實用技巧 >IDEA建立Spark開發環境

IDEA建立Spark開發環境

  1. 下面安裝的六步和通過Maven安裝Java專案的方式相同
    1. 如果剛安裝完IDEA,直接點選[Create New Project]
    2. 左側選擇Maven,然後先勾選上部的[Create from archetype],然後右側選擇[maven-archetype-quickstart]
    3. 填寫GroupID、ArtifactId,點選Next
    4. 配置一下maven的位置以及配置檔案和本地倉庫位置
    5. 配置專案儲存的路徑,接著點選Next即可
    6. 進入專案後在右下角選擇[Enable Auto-Import],這樣在修改配置檔案時自動匯入了
  2. 匯入 scala SDK
  3. 選擇匯入的scala SDK的地址
  4. 構建自己需要的專案結構(便於管理)
  5. 將對應的目錄結構改成對應的原始碼包
  6. 匯入需要的Spark開發的依賴
    <repositories>
        <repository>
          <id>central</id>
          <name>aliyun maven</name>
          <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
          <layout>default</layout>
          <!-- 是否開啟發布版構件下載 -->
    <releases> <enabled>true</enabled> </releases> <!-- 是否開啟快照版構件下載 --> <snapshots> <enabled>false</enabled> </snapshots> </repository> <repository> <id>cloudera</id> <
    url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> <repository> <id>jboss</id> <url>http://repository.jboss.com/nexus/content/groups/public</url> </repository> </repositories> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.7</maven.compiler.source> <maven.compiler.target>1.7</maven.compiler.target> <hadoop.version>2.6.0-cdh5.7.6</hadoop.version> <spark.version>2.2.0</spark.version> <mysq.version>5.1.27</mysq.version> <hbase.version>1.2.0-cdh5.7.6</hbase.version> <uasparser.version>0.6.1</uasparser.version> </properties> <dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-8_2.11</artifactId> <version>2.2.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.11</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>${mysq.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-hadoop2-compat</artifactId> <version>1.2.0-cdh5.7.6</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.0-cdh5.7.6</version> </dependency> <dependency> <groupId>cz.mallat.uasparser</groupId> <artifactId>uasparser</artifactId> <version>${uasparser.version}</version> </dependency> <!-- Spark MLlib依賴包--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-mllib_2.11</artifactId> <version>2.2.0</version> </dependency> <dependency> <groupId>org.scalanlp</groupId> <artifactId>breeze_2.11</artifactId> <version>0.13.1</version> </dependency> <!-- <dependency> <groupId>com.github.fommil.netlib</groupId> <artifactId>all</artifactId> <version>1.1.2</version> </dependency>--> <dependency> <groupId>org.jblas</groupId> <artifactId>jblas</artifactId> <version>1.2.3</version> </dependency> <dependency> <groupId>org.mongodb.spark</groupId> <artifactId>mongo-spark-connector_2.11</artifactId> <version>2.3.1</version> </dependency> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.8.0</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>0.8.2.1</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.31</version> </dependency> </dependencies>