Skip to content

Commit

Permalink
HADOOP-4230. Fix for serde2 interface, limit operator, select * opera…
Browse files Browse the repository at this point in the history
…tor,

UDF trim functions and sampling. (Ashish Thusoo via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk/src/contrib/hive@706704 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Dhruba Borthakur committed Oct 21, 2008
1 parent 42b06ed commit eafbeca
Show file tree
Hide file tree
Showing 405 changed files with 28,154 additions and 6,709 deletions.
38 changes: 28 additions & 10 deletions bin/hive
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,36 @@ done
# add the auxillary jars such as serdes
if [ -d "${HIVE_AUX_JARS_PATH}" ]; then
for f in ${HIVE_AUX_JARS_PATH}/*.jar; do
AUX_CLASSPATH=${AUX_CLASSPATH}:$f;
if [[ ! -f $f ]]; then
continue;
fi
AUX_CLASSPATH=${AUX_CLASSPATH}:$f
if [ "${AUX_PARAM}" == "" ]; then
AUX_PARAM=$f
AUX_PARAM=file://$f
else
AUX_PARAM=${AUX_PARAM},$f;
AUX_PARAM=${AUX_PARAM},file://$f;
fi
done
else
elif [ "${HIVE_AUX_JARS_PATH}" != "" ]; then
AUX_CLASSPATH=${HIVE_AUX_JARS_PATH}
AUX_PARAM=${HIVE_AUX_JARS_PATH}
AUX_PARAM=file://${HIVE_AUX_JARS_PATH}
AUX_PARAM=`echo $AUX_PARAM | sed 's/,/,file:\/\//g'`
fi
CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}

# adding jars from auxlib directory
for f in ${HIVE_HOME}/auxlib/*.jar; do
if [[ ! -f $f ]]; then
continue;
fi

AUX_CLASSPATH=${AUX_CLASSPATH}:$f
if [ "${AUX_PARAM}" == "" ]; then
AUX_PARAM=file://$f
else
AUX_PARAM=${AUX_PARAM},file://$f;
fi
done
CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}

# pass classpath to hadoop
export HADOOP_CLASSPATH=${CLASSPATH}
Expand All @@ -79,23 +96,24 @@ fi

HADOOP=$HADOOP_HOME/bin/hadoop
if [ ! -f ${HADOOP} ]; then
echo "Cannot find hadoop installation: \"$HADOOP\" does not exist";
echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
exit 4;
fi

# override default value of hadoop.bin.path to point to what we are running off
# if the user specified a -D override - this will be further overriden
export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.bin.path=$HADOOP"
export HIVE_OPTS="-hiveconf hadoop.bin.path=$HADOOP"

CLASS=org.apache.hadoop.hive.cli.CliDriver

if [ "${AUX_PARAM}" != "" ]; then
HADOOP_OPTS="-Dhive.aux.jars.path=${AUX_PARAM}"
HIVE_OPTS="$HIVE_OPTS -hiveconf hive.aux.jars.path=${AUX_PARAM}"
AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
fi
#echo "CLASSPATH="$CLASSPATH
#echo "AUX_LIB_PATH"=$AUX_PARAM
#echo "HADOOP_OPTS="$HADOOP_OPTS
# note: $@ only works in "$@"
exec $HADOOP jar $AUX_JARS_CMD_LINE ${HIVE_LIB}/hive_cli.jar $CLASS "$@"
#echo $HADOOP jar $AUX_JARS_CMD_LINE ${HIVE_LIB}/hive_cli.jar $CLASS $HIVE_OPTS "$@"
exec $HADOOP jar $AUX_JARS_CMD_LINE ${HIVE_LIB}/hive_cli.jar $CLASS $HIVE_OPTS "$@"

5 changes: 4 additions & 1 deletion build-common.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
<property name="test.build.classes" value="${test.build.dir}/classes"/>
<property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
<property name="test.include" value="Test*"/>
<property name="test.exclude" value="TestSerDe"/>
<property name="test.classpath.id" value="test.classpath"/>
<property name="test.output" value="true"/>
<property name="test.timeout" value="900000"/>
Expand All @@ -64,6 +65,7 @@
<pathelement location="${hadoop.conf.dir}"/>
<pathelement location="${test.data.dir}/conf"/>
<pathelement location="${hive.conf.dir}"/>
<fileset dir="${test.src.data.dir}" includes="files/*.jar"/>
<path refid="classpath"/>
</path>

Expand Down Expand Up @@ -95,6 +97,7 @@
<pathelement location="${hadoop.root}/build/contrib/hive/metastore/classes"/>
<pathelement location="${hadoop.root}/build/contrib/hive/ql/classes"/>
<pathelement location="${hadoop.root}/build/contrib/hive/cli/classes"/>
<fileset dir="${test.src.data.dir}" includes="files/*.jar"/>
<fileset dir="${basedir}" includes="lib/*.jar"/>
<path refid="common-classpath"/>
</path>
Expand Down Expand Up @@ -215,7 +218,7 @@
<sysproperty key="test.output.overwrite" value="${overwrite}"/>
<sysproperty key="log4j.configuration" value="file://${test.data.dir}/conf/hive-log4j.properties"/>
<sysproperty key="derby.stream.error.file" value="${test.build.dir}/derby.log"/>
<sysproperty key="hive.aux.jars.path" value="file://${test.build.dir}/test-udfs.jar"/>
<sysproperty key="hive.aux.jars.path" value="${test.build.dir}/test-udfs.jar,${test.src.data.dir}/files/TestSerDe.jar"/>
<classpath refid="${test.classpath.id}"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="${test.build.dir}" unless="testcase">
Expand Down
2 changes: 2 additions & 0 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,11 @@
<copy file="${basedir}/conf/hive-log4j.properties" todir="${target.conf.dir}"/>
<copy file="${basedir}/conf/jpox.properties" todir="${target.conf.dir}"/>
<copy todir="${target.lib.dir}/php">
<fileset dir="${hive.root}/serde/src/gen-php"/>
<fileset dir="${hive.root}/metastore/src/gen-php"/>
</copy>
<copy todir="${target.lib.dir}/py">
<fileset dir="${hive.root}/serde/src/gen-py"/>
<fileset dir="${hive.root}/metastore/src/gen-py"/>
</copy>
<copy todir="${target.lib.dir}" preservelastmodified="true" flatten="true">
Expand Down
79 changes: 58 additions & 21 deletions cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.*;
import java.util.*;

import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
Expand All @@ -35,9 +36,11 @@
public class CliDriver {

public final static String prompt = "hive";
public final static String prompt2 = " "; // when ';' is not yet seen

public static SetProcessor sp;
public static Driver qp;
public static FsShell dfs;

public CliDriver(CliSessionState ss) {
SessionState.start(ss);
Expand Down Expand Up @@ -81,30 +84,46 @@ public static int processCmd(String cmd) {
catch (Exception e) {
e.printStackTrace();
}

}
else {
} else if (cmd.startsWith("dfs")) {
// dfs shell commands
SessionState ss = SessionState.get();
if(dfs == null)
dfs = new FsShell(ss.getConf());
String hadoopCmd = cmd.replaceFirst("dfs\\s+", "");
hadoopCmd = hadoopCmd.trim();
if (hadoopCmd.endsWith(";")) {
hadoopCmd = hadoopCmd.substring(0, hadoopCmd.length()-1);
}
String[] args = hadoopCmd.split("\\s+");
try {
PrintStream oldOut = System.out;
System.setOut(ss.out);
int val = dfs.run(args);
System.setOut(oldOut);
if (val != 0) {
ss.err.write((new String("Command failed with exit code = " + val)).getBytes());
}
} catch (Exception e) {
ss.err.println("Exception raised from DFSShell.run " + e.getLocalizedMessage());
}
} else {
ret = qp.run(cmd);
Vector<Vector<String>> res = new Vector<Vector<String>>();
Vector<String> res = new Vector<String>();
while (qp.getResults(res)) {
SessionState ss = SessionState.get();
PrintStream out = ss.out;

for (Vector<String> row:res)
{
boolean firstCol = true;
for (String col:row)
{
if (!firstCol)
out.write(Utilities.tabCode);
out.print(col == null ? Utilities.nullStringOutput : col);
firstCol = false;
}
out.write(Utilities.newLineCode);
}
for (String r:res) {
SessionState ss = SessionState.get();
PrintStream out = ss.out;
out.println(r);
}
res.clear();
}

int cret = qp.close();
if (ret == 0) {
ret = cret;
}
}

return ret;
}

Expand Down Expand Up @@ -162,8 +181,15 @@ public static void main(String[] args) throws IOException {
System.exit(2);
}

// set all properties specified via command line
HiveConf conf = ss.getConf();
for(Map.Entry<Object, Object> item: ss.cmdProperties.entrySet()) {
conf.set((String) item.getKey(), (String) item.getValue());
}

sp = new SetProcessor();
qp = new Driver();
dfs = new FsShell(ss.getConf());

if(ss.execString != null) {
System.exit(processLine(ss.execString));
Expand Down Expand Up @@ -199,9 +225,20 @@ public static void main(String[] args) throws IOException {
int ret = 0;
Log LOG = LogFactory.getLog("CliDriver");
LogHelper console = new LogHelper(LOG);
while ((line = reader.readLine(prompt+"> ")) != null) {
String prefix = "";
String curPrompt = prompt;
while ((line = reader.readLine(curPrompt+"> ")) != null) {
long start = System.currentTimeMillis();
ret = processLine(line);
if(line.trim().endsWith(";")) {
line = prefix + " " + line;
ret = processLine(line);
prefix = "";
curPrompt = prompt;
} else {
prefix = prefix + line;
curPrompt = prompt2;
continue;
}
long end = System.currentTimeMillis();
if (end > start) {
double timeTaken = (double)(end-start)/1000.0;
Expand Down
7 changes: 7 additions & 0 deletions cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.hadoop.hive.cli;

import java.util.Properties;

import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.Hive;
Expand All @@ -33,6 +35,11 @@ public class CliSessionState extends SessionState {
*/
public String fileName;

/**
* properties set from -hiveconf via cmdline
*/
public Properties cmdProperties = new Properties();


public CliSessionState() {
super();
Expand Down
9 changes: 9 additions & 0 deletions cli/src/java/org/apache/hadoop/hive/cli/OptionsProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,15 @@ public boolean process_stage2(CliSessionState ss) {
printUsage("-e and -f option cannot be specified simultaneously");
return false;
}

List<String> hiveConfArgs = (List<String>)cmdLine.getValue(confOptions);
if (null != hiveConfArgs){
for(String s : hiveConfArgs){
String []parts = s.split("=", 2);
ss.cmdProperties.setProperty(parts[0], parts[1]);
}
}

return true;
}

Expand Down
5 changes: 3 additions & 2 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public static enum ConfVars {
// for hive script operator
HIVETABLENAME("hive.table.name", ""),
HIVEPARTITIONNAME("hive.partition.name", ""),
HIVEPARTITIONPRUNER("hive.partition.pruning", "nonstrict"),
HIVEALIAS("hive.alias", "");

public final String varname;
Expand Down Expand Up @@ -170,13 +171,13 @@ private void initialize(Class cls) {
// let's add the hive configuration
URL hconfurl = getClassLoader().getResource("hive-default.xml");
if(hconfurl == null) {
l4j.warn("Unable to locate default hive configuration");
l4j.debug("hive-default.xml not found.");
} else {
addResource(hconfurl);
}
URL hsiteurl = getClassLoader().getResource("hive-site.xml");
if(hsiteurl == null) {
l4j.warn("Unable to locate hive site configuration");
l4j.debug("hive-site.xml not found.");
} else {
addResource(hsiteurl);
}
Expand Down
15 changes: 15 additions & 0 deletions conf/hive-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@
<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized -->
<!-- resource). -->

<!-- Hadoop Setup -->
<property>
<name>hadoop.bin.path</name>
<value>${user.dir}/../../../bin/hadoop</value>
<!-- note that the hive shell script also uses this property name -->
<description>Path to hadoop binary. Assumes that by default we are executing from hive</description>
</property>

<property>
<name>hadoop.config.dir</name>
<value>${user.dir}/../../../conf</value>
<!-- note that the hive shell script also uses this property name -->
<description>Path to hadoop configuration. Again assumes that by default we are executing from hive/</description>
</property>

<!-- Hive Execution Parameters -->
<property>
<name>hive.exec.scratchdir</name>
Expand Down
Loading

0 comments on commit eafbeca

Please sign in to comment.