flink入门(wordcount)

Flink快速上手

1.在IDEA创建maven工程FlinkTutorial

2.在pom.xml中添加依赖和maven插件

 <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.11</artifactId>
            <version>1.10.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.10.2</version>
        </dependency>
     	<dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.0</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.flink</groupId>
                    <artifactId>flink-streaming-java_2.11</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch6_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.44</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-csv</artifactId>
            <version>1.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>1.10.1</version>
        </dependency>
 </dependencies>
<build>
    <plugins>
    <!-- 该插件用于将 Scala 代码编译成 class 文件 -->
    <plugin>
    <groupId>net.alchim31.maven</groupId>
    <artifactId>scala-maven-plugin</artifactId>
    <version>3.4.6</version>
    <executions>
        <execution>
        <!-- 声明绑定到 maven 的 compile 阶段 -->
        <goals>
            <goal>compile</goal>
        </goals>
         </execution>
    </executions>
    </plugin>
    <plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-assembly-plugin</artifactId>
    <version>3.0.0</version>
    <configuration>
        <descriptorRefs>
            <descriptorRef>jar-with-dependencies</descriptorRef>
        </descriptorRefs>
    </configuration>
    <executions>
        <execution>
            <id>make-assembly</id>
            <phase>package</phase>
            <goals>
                <goal>single</goal>
            </goals>
        </execution>
    </executions>
    </plugin>
    </plugins>
</build>

在项目添加scala支持

批处理wordcount

import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._

object WordCount01 {
  def main(args: Array[String]): Unit = {
    // 创建一个批处理环境
    val env = ExecutionEnvironment.getExecutionEnvironment
    // 从文件中读取数据
 val inputData = env.readTextFile("FlinkBasis/src/resources/hello.txt")
    val data:DataSet[String] = env.readTextFile(inputpath)
    val result = data.flatMap(_.split(" ")).map((_,1)).
      groupBy(0) //以第一个字段分组
      .sum(1)  //对第二个子端求和
    result.print()
  }
}

image-20211010154306099

流处理wordcount

import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala._

object StreamWordCount {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 设置并行线程,开发环境中设置并行度是自己电脑的核心数
    //    env.setParallelism(2)
    // 从外部命令中提取参数,作为socket主机名和端口号
//    val paramTool: ParameterTool = ParameterTool.fromArgs(args)
//    val host: String = paramTool.get("host")
//    val port: Int = paramTool.getInt("port")
    //接受socket文本流
    val data = env.socketTextStream("localhost", 7777)
    //进行wordcount
    val result = data.flatMap(_.split(" ")).filter(_.nonEmpty).map((_, 1)).keyBy(0).sum(1)
//    result.print()
    //如果不希望出现线程index就在print语句中设置并行度为1
    result.print().setParallelism(1)
    env.execute("stream word count")
  }
}

本地windows安装nc,cmd执行nc -l -p 7777或者在本地linux(微软商店的ubuntu)使用nc -lk 7777

发送数据

image-20211010154159026