孙宇的技术专栏 大数据/机器学习

HBase-PHP

2016-09-06

阅读:


安装

cd /data/soft/
wget http://archive.apache.org/dist/hbase/hbase-0.94.9/hbase-0.94.9.tar.gz
tar -zxf hbase-0.94.9-bin.tar.gz
mv hbase-0.94.9 /usr/local/hbase

注意:下载的时候一定要注意版本。如果通过JAVA进行客户端连接,如果客户端 jar 和服务端的版本不一致,会连接失败。 当安装过一个版本,想安装另一个版本时,一定要把 /tmp/hbase* 目录及文件全部删掉

修改 /etc/profile 添加:

export HBASE_HOME=/usr/local/hbase

启动 hbase:

/usr/local/hbase/bin/start-hbase.sh
starting master, logging to /usr/local/hbase/bin/../logs/hbase-root-master-localhost.out

启动后,16010 端口会开始监听。 同时,会自动启动 zookeeper,它是监听 2181 端口

可以访问管理端:http://192.168.192.110:60010/master-status

hbase 的配置信息主要在两个文件里: hbase-env.sh, hbase-site.xml 里。这两个文件在安装文件的解压目录。即:/usr/local/conf/。

在单机模式的默认设置里,HBase把数据写到了 /tmp 下,但此处不宜久留。我们可以更改 hbase-site.xml 文件,添加下面的配置来修改数据目录:

<property>
	<name>hbase.rootdir</name>
	<value>/data/hbase/</value>
</property>

改完后,重新启动 hbase(执行 /usr/local/hbase/bin/stop-hbase.sh 和 /usr/local/hbase/bin/start-hbase.sh)。启动后,/data/hbase/目录下会有数据文件和目录。

hbase - php

php 操作 hbase 是基于 thrift 的。

启动 hbase 和 hbase 的 thrift 服务

/usr/local/hbase/bin/start-hbase.sh
/usr/local/hbase/bin/hbase thrift -p 9090 start

生成 php 的 hbase-thrift 客户端

/usr/local/bin/thrift --gen php /usr/local/hbase/src/main/resources/org/apache/hadoop/hbase/thrift/Hbase.thrift

或:

/usr/local/bin/thrift --gen php /usr/local/hbase/src/main/resources/org/apache/hadoop/hbase/thrift2/hbase.thrift

后面的是生成的 thrift2 的代码.它和 thrift的接口有许多不一样

执行完后当前目录下会有 gen-php 目录,目录下有文件: Hbase.php, Types.php

把这两个文件复制到测试代码的项目目录,如:

cp gen-php/* /home/httpd/sites/hbase

测试代码:

<?php

use Thrift\Protocol\TBinaryProtocol;
use Thrift\Transport\TSocket;
use Thrift\Transport\THttpClient;
use Thrift\Transport\TBufferedTransport;
use Thrift\Exception\TException;

ini_set('display_errors', '1');
error_reporting(E_ERROR);

ini_set('memory_limit', '2048m');
set_time_limit(600);

$GLOBALS['THRIFT_ROOT'] = '/usr/local/edengweb/php/Thrift';

require_once( $GLOBALS['THRIFT_ROOT'].'/Thrift.php' );

require_once( $GLOBALS['THRIFT_ROOT'].'/Transport/TSocket.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Transport/TBufferedTransport.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Protocol/TBinaryProtocol.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Type/TMessageType.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Type/TType.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Factory/TStringFuncFactory.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/StringFunc/TStringFunc.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/StringFunc/Core.php' );
require_once( $GLOBALS['THRIFT_ROOT'].'/Exception/TException.php' );

require_once( $GLOBALS['THRIFT_ROOT'].'/packages/Types.php');
require_once( $GLOBALS['THRIFT_ROOT'].'/packages/Hbase.php');

$socket = new TSocket( 'localhost', 9090 );
$socket->setSendTimeout( 10000 ); // 发送10秒超时 ( 因为是测试所以可以这么长 ;)
$socket->setRecvTimeout( 20000 ); // 接收超时 20 秒
$transport = new TBufferedTransport( $socket );
$protocol = new TBinaryProtocol( $transport );
$client = new \HbaseClient( $protocol );

$transport->open();

$t = 'demo_table';

echo( "scanning tables...\n" );
$tables = $client->getTableNames();
sort( $tables );
foreach ( $tables as $name ) {
  echo( "  found: {$name}\n" );
  if ( $name == $t ) {
    if ($client->isTableEnabled( $name )) {
      echo( "    disabling table: {$name}\n");
      $client->disableTable( $name );
    }
    echo( "    deleting table: {$name}\n" );
    $client->deleteTable( $name );
  }
}

$columns = array(
  new ColumnDescriptor( array(
    'name' => 'entry:',
    'maxVersions' => 10
  ) ),
  new ColumnDescriptor( array(
    'name' => 'unused:'
  ) )
);

echo( "creating table: {$t}\n" );
try {
  $client->createTable( $t, $columns );
} catch ( AlreadyExists $ae ) {
  echo( "WARN: {$ae->message}\n" );
}

echo( "column families in {$t}:\n" );
$descriptors = $client->getColumnDescriptors( $t );
asort( $descriptors );
foreach ( $descriptors as $col ) {
  echo( "  column: {$col->name}, maxVer: {$col->maxVersions}\n" );
}

$invalid = "foo-\xfc\xa1\xa1\xa1\xa1\xa1";
$valid = "foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB";

# non-utf8 is fine for data
$mutations = array(
  new Mutation( array(
    'column' => 'entry:foo',
    'value' => $invalid
  ) ),
);
$client->mutateRow( $t, "foo", $mutations , array());

# try empty strings
$mutations = array(
  new Mutation( array(
    'column' => 'entry:',
    'value' => ""
  ) ),
);
$client->mutateRow( $t, "", $mutations , array());

# this row name is valid utf8
$mutations = array(
  new Mutation( array(
    'column' => 'entry:foo',
    'value' => $valid
  ) ),
);
$client->mutateRow( $t, $valid, $mutations , array());

# Run a scanner on the rows we just created
echo( "Starting scanner...\n" );
$scanner = $client->scannerOpen( $t, "1", array ('column' => 'entry:foo') , array());
try {
  $nbRows = 100;
  $arr = $client->scannerGetList($scanner, $nbRows);
  print_r($arr);
} catch ( NotFound $nf ) {
  $client->scannerClose( $scanner );
  echo( "Scanner finished\n" );
}

$transport->close();

?>

上一篇 HBase

评论

文章