2016-04-27 22 views
0

Ich schreibe eine Karte reduzieren Code, um eine Textdatei in ORC-Format zu konvertieren.Ich habe ORCNewOutputFormat verwendet, die in hive-exec-0.13.0 Jar vorhanden ist.Ich arbeite in ein 15-Knoten-Cluster (HDP 2.3) .Das Problem ist ich meine externe jar (Bienenstock-exec-0.13.0.jar) nicht in der Lage zu laden, die in meiner Karte reduziert Code zugegriffen werden. Ich habe andere Möglichkeiten, wie libjars versucht und verteilten Cache als gut, aber der Code ist nicht in der Lage, die jar.Is es eine andere Möglichkeit für den Zugriff auf, dies zu tun ???zum Konvertieren von Textdatei in Ork-Format mit Karte reduzieren

//Driver Code 
import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.filecache.DistributedCache; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.hive.ql.io.orc.OrcNewOutputFormat; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.Writable; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 

public class Driver { 

    public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException 
    { 
     Configuration conf =new Configuration(); 
     Job job =new Job(conf,"join"); 
     job.setJarByClass(Driver.class); 
     job.setReducerClass(ReducerORC.class); 
     job.setMapperClass(MapORC.class); 
     job.setMapOutputKeyClass(Text.class); 
     job.setMapOutputValueClass(Text.class); 
     job.setOutputKeyClass(NullWritable.class); 
     job.setOutputValueClass(Writable.class); 
     conf.set("orc.create.index","true"); 
     OrcNewOutputFormat.setCompressOutput(job,true); 
     OrcNewOutputFormat.setOutputPath(job,new Path(args[1])); 
     FileInputFormat.addInputPath(job,new Path(args[0])); 
     System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

.

//Mapper Code 
import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.filecache.DistributedCache; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer.Context; 

public class MapORC extends Mapper<LongWritable, Text, Text, Text> 
{ 
    @Override 

    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException 
    { 
     String [] split=value.toString().split(",",2); 
     String id = split[0]; 
     String others= split[1]; 
      context.write(new Text(id),new Text(others)); 
     } 
    } 

.

//Reducer Code 
import java.util.ArrayList; 
import java.io.IOException; 
import java.util.List; 
import org.apache.hadoop.filecache.DistributedCache; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.hive.ql.io.orc.OrcSerde; 
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.Writable; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.Mapper.Context; 

public class ReducerORC extends Reducer<Text,Text,NullWritable,Writable> { 


    private final OrcSerde orcSerde = new OrcSerde(); 
    private Writable row; 


    private final String Struct1 = "struct<name:string,age:String>"; 
    private final TypeInfo ReduceTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(Struct1); 
    private final ObjectInspector ins = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(ReduceTypeInfo); 

    @Override 
    protected void reduce(Text key,Iterable<Text> values, Context context) { 

     List <Text> orcRecord = new ArrayList<Text>(); 
     orcRecord.add(key); 
     for(Text value:values) 
     { 
     orcRecord.add(value); 
     } 

     this.row = orcSerde.serialize(orcRecord, ins); 
     try { 
      context.write(NullWritable.get(), this.row); 
     } catch (IOException e) { 
      e.printStackTrace(); 
     } catch (InterruptedException e) { 
      e.printStackTrace(); 
     } 

     } 

    } 

.

Fehler für den obigen Code

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hive/ql/io/orc/OrcNewOutputFormat 
     at Driver.main(Driver.java:27) 
     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
     at java.lang.reflect.Method.invoke(Method.java:606) 
     at org.apache.hadoop.util.RunJar.run(RunJar.java:221) 
     at org.apache.hadoop.util.RunJar.main(RunJar.java:136) 
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.io.orc.OrcNewOutputFormat 
     at java.net.URLClassLoader$1.run(URLClassLoader.java:366) 
     at java.net.URLClassLoader$1.run(URLClassLoader.java:355) 
     at java.security.AccessController.doPrivileged(Native Method) 
     at java.net.URLClassLoader.findClass(URLClassLoader.java:354) 
     at java.lang.ClassLoader.loadClass(ClassLoader.java:425) 
     at java.lang.ClassLoader.loadClass(ClassLoader.java:358) 
     ... 7 more 

Antwort

0

Haben Sie setzen Ihre HADOOP_CLASSPATH das Glas schließen Sie wollen?

Wenn das nicht funktioniert, was Sie suchen HADOOP_USER_CLASSPATH_FIRST=true sein kann

, die Sie als die verwenden, um die JAR-Versionen in Ihrem HADOOP_CLASSPATH eher hilft, die mit der Hadoop-Distribution kommt.