-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path67914616.html
More file actions
381 lines (273 loc) · 20.9 KB
/
67914616.html
File metadata and controls
381 lines (273 loc) · 20.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width">
<meta name="theme-color" content="#222"><meta name="generator" content="Hexo 6.3.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon.png">
<link rel="mask-icon" href="/images/logo.svg" color="#222">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" integrity="sha256-HtsXJanqjKTc8vVQjO4YMhiqFoXkfBsjBWcX91T1jr8=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/3.1.1/animate.min.css" integrity="sha256-PR7ttpcvz8qrF57fur/yAx1qXMFJeJFiA6pSzWi0OIE=" crossorigin="anonymous">
<script class="next-config" data-name="main" type="application/json">{"hostname":"tallate.github.io","root":"/","images":"/images","scheme":"Gemini","darkmode":false,"version":"8.18.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12},"copycode":{"enable":false,"style":null},"fold":{"enable":false,"height":500},"bookmark":{"enable":false,"color":"#222","save":"auto"},"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"stickytabs":false,"motion":{"enable":true,"async":false,"transition":{"menu_item":"fadeInDown","post_block":"fadeIn","post_header":"fadeInDown","post_body":"fadeInDown","coll_header":"fadeInLeft","sidebar":"fadeInUp"}},"prism":false,"i18n":{"placeholder":"搜索...","empty":"没有找到任何搜索结果:${query}","hits_time":"找到 ${hits} 个搜索结果(用时 ${time} 毫秒)","hits":"找到 ${hits} 个搜索结果"}}</script><script src="/js/config.js"></script>
<meta name="description" content="Hadoop的基本使用。">
<meta property="og:type" content="article">
<meta property="og:title" content="Hadoop入门">
<meta property="og:url" content="https://tallate.github.io/67914616.html">
<meta property="og:site_name" content="Tallate">
<meta property="og:description" content="Hadoop的基本使用。">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="https://tallate.github.io/imgs/Hadoop/Hadoop%E7%94%9F%E6%80%81%E7%B3%BB%E7%BB%9F.png">
<meta property="article:published_time" content="2021-04-11T08:26:49.000Z">
<meta property="article:modified_time" content="2025-07-06T17:56:20.884Z">
<meta property="article:author" content="tallate">
<meta property="article:tag" content="Hadoop">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://tallate.github.io/imgs/Hadoop/Hadoop%E7%94%9F%E6%80%81%E7%B3%BB%E7%BB%9F.png">
<link rel="canonical" href="https://tallate.github.io/67914616.html">
<script class="next-config" data-name="page" type="application/json">{"sidebar":"","isHome":false,"isPost":true,"lang":"zh-CN","comments":true,"permalink":"https://tallate.github.io/67914616.html","path":"/67914616.html","title":"Hadoop入门"}</script>
<script class="next-config" data-name="calendar" type="application/json">""</script>
<title>Hadoop入门 | Tallate</title>
<noscript>
<link rel="stylesheet" href="/css/noscript.css">
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage" class="use-motion">
<div class="headband"></div>
<main class="main">
<div class="column">
<header class="header" itemscope itemtype="http://schema.org/WPHeader"><div class="site-brand-container">
<div class="site-nav-toggle">
<div class="toggle" aria-label="切换导航栏" role="button">
<span class="toggle-line"></span>
<span class="toggle-line"></span>
<span class="toggle-line"></span>
</div>
</div>
<div class="site-meta">
<a href="/" class="brand" rel="start">
<i class="logo-line"></i>
<p class="site-title">Tallate</p>
<i class="logo-line"></i>
</a>
<p class="site-subtitle" itemprop="description">该吃吃该喝喝 啥事别往心里搁</p>
</div>
<div class="site-nav-right">
<div class="toggle popup-trigger" aria-label="搜索" role="button">
<i class="fa fa-search fa-fw fa-lg"></i>
</div>
</div>
</div>
<nav class="site-nav">
<ul class="main-menu menu"><li class="menu-item menu-item-home"><a href="/" rel="section"><i class="home fa-fw"></i>首页</a></li><li class="menu-item menu-item-about"><a href="/about/" rel="section"><i class="user fa-fw"></i>关于</a></li><li class="menu-item menu-item-tags"><a href="/tags/" rel="section"><i class="tags fa-fw"></i>标签<span class="badge">84</span></a></li><li class="menu-item menu-item-categories"><a href="/categories/" rel="section"><i class="th fa-fw"></i>分类<span class="badge">25</span></a></li><li class="menu-item menu-item-archives"><a href="/archives/" rel="section"><i class="archive fa-fw"></i>归档<span class="badge">192</span></a></li>
<li class="menu-item menu-item-search">
<a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
</a>
</li>
</ul>
</nav>
<div class="search-pop-overlay">
<div class="popup search-popup"><div class="search-header">
<span class="search-icon">
<i class="fa fa-search"></i>
</span>
<div class="search-input-container">
<input autocomplete="off" autocapitalize="off" maxlength="80"
placeholder="搜索..." spellcheck="false"
type="search" class="search-input">
</div>
<span class="popup-btn-close" role="button">
<i class="fa fa-times-circle"></i>
</span>
</div>
<div class="search-result-container no-result">
<div class="search-result-icon">
<i class="fa fa-spinner fa-pulse fa-5x"></i>
</div>
</div>
</div>
</div>
</header>
<aside class="sidebar">
<div class="sidebar-inner sidebar-nav-active sidebar-toc-active">
<ul class="sidebar-nav">
<li class="sidebar-nav-toc">
文章目录
</li>
<li class="sidebar-nav-overview">
站点概览
</li>
</ul>
<div class="sidebar-panel-container">
<!--noindex-->
<div class="post-toc-wrap sidebar-panel">
<div class="post-toc animated"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#Hadoop%E7%94%9F%E6%80%81"><span class="nav-number">1.</span> <span class="nav-text">Hadoop生态</span></a></li><li class="nav-item nav-level-1"><a class="nav-link" href="#%E5%AE%89%E8%A3%85"><span class="nav-number">2.</span> <span class="nav-text">安装</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#run-demo"><span class="nav-number">2.1.</span> <span class="nav-text">run demo</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E4%BC%AA%E5%88%86%E5%B8%83%E5%BC%8F%E5%AE%89%E8%A3%85"><span class="nav-number">2.2.</span> <span class="nav-text">伪分布式安装</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%88%86%E5%B8%83%E5%BC%8F%E5%AE%89%E8%A3%85"><span class="nav-number">2.3.</span> <span class="nav-text">分布式安装</span></a></li></ol></li></ol></div>
</div>
<!--/noindex-->
<div class="site-overview-wrap sidebar-panel">
<div class="site-author animated" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name">tallate</p>
<div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap animated">
<nav class="site-state">
<div class="site-state-item site-state-posts">
<a href="/archives/">
<span class="site-state-item-count">192</span>
<span class="site-state-item-name">日志</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<a href="/categories/">
<span class="site-state-item-count">25</span>
<span class="site-state-item-name">分类</span></a>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags/">
<span class="site-state-item-count">84</span>
<span class="site-state-item-name">标签</span></a>
</div>
</nav>
</div>
</div>
</div>
</div>
</aside>
</div>
<div class="main-inner post posts-expand">
<div class="post-block">
<article itemscope itemtype="http://schema.org/Article" class="post-content" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://tallate.github.io/67914616.html">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="tallate">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Tallate">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="post" itemscope itemtype="http://schema.org/CreativeWork">
<meta itemprop="name" content="Hadoop入门 | Tallate">
<meta itemprop="description" content="">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
Hadoop入门
</h1>
<div class="post-meta-container">
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2021-04-11 16:26:49" itemprop="dateCreated datePublished" datetime="2021-04-11T16:26:49+08:00">2021-04-11</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2025-07-07 01:56:20" itemprop="dateModified" datetime="2025-07-07T01:56:20+08:00">2025-07-07</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/Hadoop/" itemprop="url" rel="index"><span itemprop="name">Hadoop</span></a>
</span>
</span>
</div>
</div>
</header>
<div class="post-body" itemprop="articleBody"><p>Hadoop的基本使用。</p>
<span id="more"></span>
<h1 id="Hadoop生态"><a href="#Hadoop生态" class="headerlink" title="Hadoop生态"></a>Hadoop生态</h1><p><img src="/imgs/Hadoop/Hadoop%E7%94%9F%E6%80%81%E7%B3%BB%E7%BB%9F.png" alt="Hadoop生态系统" title="Hadoop生态系统"></p>
<ul>
<li>使用MapReduce处理HBase中的海量数据</li>
<li>利用ZooKeeper作为协同服务</li>
<li>HDFS作为高可靠的底层存储,利用廉价集群提供海量数据存储能力<br>当然HBase也可以使用本地文件系统而不用HDFS作为底层数据存储方式,不过为了提高数据可靠性和系统健壮性,发挥HBase处理大数据量的能力,一般都使用HDFS作为HBase的底层数据存储方式。</li>
<li>Sqoop提供了数据导入功能,比如将RDBMS数据导入到HBase</li>
<li>Pig和Hive为HBase提供了高层语言支持</li>
</ul>
<h1 id="安装"><a href="#安装" class="headerlink" title="安装"></a>安装</h1><h2 id="run-demo"><a href="#run-demo" class="headerlink" title="run demo"></a>run demo</h2><p>从官网下载:<a target="_blank" rel="noopener" href="http://hadoop.apache.org/">http://hadoop.apache.org/</a></p>
<p><strong>事前确保JAVA_HOME已经设置好</strong></p>
<p>查看版本号(运行前确保JAVA_HOME已设置):</p>
<figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">./bin/hadoop version</span><br></pre></td></tr></table></figure>
<p>运行测试用例:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">mkdir input</span><br><span class="line">cp etc/hadoop/*.xml input</span><br><span class="line">./bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce</span><br><span class="line">-examples-3.2.2.jar grep ./input ./output 'dfs[a-z.]+'</span><br><span class="line"># 查看结果</span><br><span class="line">cat output/*</span><br><span class="line"># 输出:1 dfsadmin</span><br></pre></td></tr></table></figure>
<h2 id="伪分布式安装"><a href="#伪分布式安装" class="headerlink" title="伪分布式安装"></a>伪分布式安装</h2><p>单节点启动:<a target="_blank" rel="noopener" href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html">https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html</a></p>
<p>hadoop里的配置文件都在etc目录下面:</p>
<ul>
<li>hadoop-env.sh:配置环境变量</li>
<li>core-site.xml:Hadoop core的配置项,如HDFS和MapReduce常用的IO配置等。</li>
<li>hdfs-site.xml:Hadoop守护进程的配置,包括NameNode、Secondary NameNode和DataNode等</li>
<li>mapred-site.xml:MapReduce守护进程的配置项,包括JobTracker和TaskTracker</li>
<li>masters:运行SecondaryNameNode的机器列表</li>
<li>slaves:运行DataNode和TaskTracker的机器列表</li>
<li>hadoop-metrics.properties:控制metrics在Hadoop上如何发布的属性</li>
</ul>
<p>修改core-site.xml,指定HDFS的地址和端口号:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><span class="line"><configuration></span><br><span class="line"> <property></span><br><span class="line"> <name>hadoop.tmp.dir</name></span><br><span class="line"> <value>file:/usr/local/hadoop/tmp</value></span><br><span class="line"> </property></span><br><span class="line"> <property></span><br><span class="line"> <name>fs.defaultFS</name></span><br><span class="line"> <value>hdfs://localhost:9000</value></span><br><span class="line"> </property></span><br><span class="line"></configuration></span><br></pre></td></tr></table></figure>
<p>修改hdfs-site.xml,配置副本数量、元数据存放的路径、数据存放的路径:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line"><configuration></span><br><span class="line"> <property></span><br><span class="line"> <name>dfs.replication</name></span><br><span class="line"> <value>1</value></span><br><span class="line"> </property></span><br><span class="line"> <property></span><br><span class="line"> <name>dfs.namenode.name.dir</name></span><br><span class="line"> <value>file:/usr/local/hadoop/tmp/dfs/data</value></span><br><span class="line"> </property></span><br><span class="line"> <property></span><br><span class="line"> <name>dfs.datanode.data.dir</name></span><br><span class="line"> <value>file:/usr/local/hadoop/tmp/dfs/data</value></span><br><span class="line"> </property></span><br><span class="line"></configuration></span><br></pre></td></tr></table></figure>
<p>初始化HDFS文件系统:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">./bin/hadoop namenode -format</span><br></pre></td></tr></table></figure>
<h2 id="分布式安装"><a href="#分布式安装" class="headerlink" title="分布式安装"></a>分布式安装</h2><p>TODO</p>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/[email protected]/dist/kity.min.js"></script><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/[email protected]/dist/kityminder.core.min.js"></script><script defer="true" type="text/javascript" src="https://cdn.jsdelivr.net/npm/[email protected]/dist/mindmap.min.js"></script><link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/mindmap.min.css">
</div>
<footer class="post-footer">
<div class="post-tags">
<a href="/tags/Hadoop/" rel="tag"># Hadoop</a>
</div>
<div class="post-nav">
<div class="post-nav-item">
<a href="/e04c766b.html" rel="prev" title="Kafka原理总结">
<i class="fa fa-angle-left"></i> Kafka原理总结
</a>
</div>
<div class="post-nav-item">
<a href="/9863f346.html" rel="next" title="ES5_2创建Index原则">
ES5_2创建Index原则 <i class="fa fa-angle-right"></i>
</a>
</div>
</div>
</footer>
</article>
</div>
</div>
</main>
<footer class="footer">
<div class="footer-inner">
<div class="copyright">
©
<span itemprop="copyrightYear">2025</span>
<span class="with-love">
<i class="fa fa-heart"></i>
</span>
<span class="author" itemprop="copyrightHolder">tallate</span>
</div>
<div class="powered-by">由 <a href="https://hexo.io/" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.js.org/" rel="noopener" target="_blank">NexT.Gemini</a> 强力驱动
</div>
</div>
</footer>
<div class="back-to-top" role="button" aria-label="返回顶部">
<i class="fa fa-arrow-up fa-lg"></i>
<span>0%</span>
</div>
<a href="https://github.com/tallate" class="github-corner" title="在 GitHub 上关注我" aria-label="在 GitHub 上关注我" rel="noopener" target="_blank"><svg width="80" height="80" viewBox="0 0 250 250" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a>
<noscript>
<div class="noscript-warning">Theme NexT works best with JavaScript enabled</div>
</noscript>
<script src="https://cdnjs.cloudflare.com/ajax/libs/animejs/3.2.1/anime.min.js" integrity="sha256-XL2inqUJaslATFnHdJOi9GfQ60on8Wx1C2H8DYiN1xY=" crossorigin="anonymous"></script>
<script src="/js/comments.js"></script><script src="/js/utils.js"></script><script src="/js/motion.js"></script><script src="/js/next-boot.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/hexo-generator-searchdb/1.4.1/search.js" integrity="sha256-1kfA5uHPf65M5cphT2dvymhkuyHPQp5A53EGZOnOLmc=" crossorigin="anonymous"></script>
<script src="/js/third-party/search/local-search.js"></script>
<script class="next-config" data-name="mermaid" type="application/json">{"enable":true,"version":"7.1.2","options":null,"js":{"url":"https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.3.0/mermaid.min.js","integrity":"sha256-9y71g5Lz/KLsHjB8uXwnkuWDtAMDSzD/HdIbqhJfTAI="}}</script>
<script src="/js/third-party/tags/mermaid.js"></script>
</body>
</html>