学习资料
测试hdsf使用正常
hdfs dfs -ls /
echo "dog dog cat" > input.txt hdfs dfs -rm /user/input.txt hdfs dfs -put input.txt /user hdfs dfs -ls /user hadoop jar hadoop-mapreduce-examples-3.3.6.jar wordcount /user/input.txt /user/output
hadoop jar hadoop-mapreduce-examples-3.3.6.jar pi 2 10
import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Main { public static void main(String[] args) throws Exception { Job job = new Job(); job.setJobName("word count"); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("/home/wangyang/workspace/hadoop/hadoop-demo/workDir/input.txt")); FileOutputFormat.setOutputPath(job, new Path("/home/wangyang/workspace/hadoop/hadoop-demo/workDir/output")); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
. ├── input.txt └── output ├── part-r-00000 └── _SUCCESS
文件part-r-00000的内容如下:
part-r-00000
a 4 v 2
public class Main { public static void main(String[] args) throws Exception { Job job = new Job(); job.setJobName("word count"); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path("hdfs://server:8020/user/input.txt")); FileOutputFormat.setOutputPath(job, new Path("hdfs://server:8020/user/output3")); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class JobSubmit { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); //1. 设置job运行时需要访问的默认文件系统 conf.set("fs.defaultFS", "hdfs://server:8020"); //2. 设置job提交到哪里去 conf.set("mapreduce.framework.name", "yarn"); conf.set("yarn.resourcemanager.hostname", "server"); Job job = new Job(conf); job.setJobName("word count"); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // FileInputFormat.addInputPath(job, new Path("hdfs://server:8020/user/input.txt")); // FileOutputFormat.setOutputPath(job, new Path("hdfs://server:8020/user/output5")); FileInputFormat.addInputPath(job, new Path("/user/input.txt")); FileOutputFormat.setOutputPath(job, new Path("/user/output6")); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
使用hdfs文件系统时, Server IPC version 9 cannot communicate with client version 4Java初始化hdfs client时出现的,原因是maven依赖的版本和hdfs的版本不一致maven仓库中的hadoop-core最高只有1.2.1版本, 不能加这个配置,我在测试的时候装的本地hadoop版本是hadoop-3.3.6,因此使用hadoop-common + hadoop-hdfs + hadoop-client的3.3.6版本即可
将下面的依赖替换
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>1.2.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.3.4</version> </dependency> </dependencies>
为
<properties> <hadoop.version>3.3.6</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>3.3.6</version> </dependency> </dependencies>