Certificat Big Data - Hadoop et Spark

Hadoop

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.lang.InterruptedException;
import java.util.StringTokenizer;
import java.util.Iterator;

public class WordCount 
{
	public static void main(String[] args) throws Exception
	{
		Job job = createJob(args[0],args[1]);
		job.setJarByClass(WordCount.class);
		job.waitForCompletion(true);
	}

	public static Job createJob(String in, String out) throws IOException, InterruptedException,ClassNotFoundException {
	  Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf);
	Job job=Job.getInstance(conf,"Compteur de mots v1.0");
	job.setMapperClass(WordCountMapper.class);
	job.setReducerClass(WordCountReducer.class);
	job.setJarByClass(WordCount.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	FileInputFormat.addInputPath(job,new Path(in));
	FileOutputFormat.setOutputPath(job,new Path(out));
	job.waitForCompletion(true);
	return job;
}

}

Et les import

import java.io.IOException;
import java.lang.InterruptedException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


Spark