Ich schreibe eine Karte reduzieren Code, um eine Textdatei in ORC-Format zu konvertieren.Ich habe ORCNewOutputFormat verwendet, die in hive-exec-0.13.0 Jar vorhanden ist.Ich arbeite in ein 15-Knoten-Cluster (HDP 2.3) .Das Problem ist ich meine externe jar (Bienenstock-exec-0.13.0.jar) nicht in der Lage zu laden, die in meiner Karte reduziert Code zugegriffen werden. Ich habe andere Möglichkeiten, wie libjars versucht und verteilten Cache als gut, aber der Code ist nicht in der Lage, die jar.Is es eine andere Möglichkeit für den Zugriff auf, dies zu tun ???zum Konvertieren von Textdatei in Ork-Format mit Karte reduzieren
//Driver Code
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcNewOutputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class Driver {
public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException
{
Configuration conf =new Configuration();
Job job =new Job(conf,"join");
job.setJarByClass(Driver.class);
job.setReducerClass(ReducerORC.class);
job.setMapperClass(MapORC.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Writable.class);
conf.set("orc.create.index","true");
OrcNewOutputFormat.setCompressOutput(job,true);
OrcNewOutputFormat.setOutputPath(job,new Path(args[1]));
FileInputFormat.addInputPath(job,new Path(args[0]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
.
//Mapper Code
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class MapORC extends Mapper<LongWritable, Text, Text, Text>
{
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String [] split=value.toString().split(",",2);
String id = split[0];
String others= split[1];
context.write(new Text(id),new Text(others));
}
}
.
//Reducer Code
import java.util.ArrayList;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class ReducerORC extends Reducer<Text,Text,NullWritable,Writable> {
private final OrcSerde orcSerde = new OrcSerde();
private Writable row;
private final String Struct1 = "struct<name:string,age:String>";
private final TypeInfo ReduceTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(Struct1);
private final ObjectInspector ins = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(ReduceTypeInfo);
@Override
protected void reduce(Text key,Iterable<Text> values, Context context) {
List <Text> orcRecord = new ArrayList<Text>();
orcRecord.add(key);
for(Text value:values)
{
orcRecord.add(value);
}
this.row = orcSerde.serialize(orcRecord, ins);
try {
context.write(NullWritable.get(), this.row);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
.
Fehler für den obigen Code
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat
at Driver.main(Driver.java:27)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.orc.OrcNewOutputFormat
at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
... 7 more