-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path59c58f8f.html
More file actions
448 lines (337 loc) · 44.3 KB
/
59c58f8f.html
File metadata and controls
448 lines (337 loc) · 44.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width">
<meta name="theme-color" content="#222"><meta name="generator" content="Hexo 6.3.0">
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon.png">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon.png">
<link rel="mask-icon" href="/images/logo.svg" color="#222">
<link rel="stylesheet" href="/css/main.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" integrity="sha256-HtsXJanqjKTc8vVQjO4YMhiqFoXkfBsjBWcX91T1jr8=" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/3.1.1/animate.min.css" integrity="sha256-PR7ttpcvz8qrF57fur/yAx1qXMFJeJFiA6pSzWi0OIE=" crossorigin="anonymous">
<script class="next-config" data-name="main" type="application/json">{"hostname":"tallate.github.io","root":"/","images":"/images","scheme":"Gemini","darkmode":false,"version":"8.18.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12},"copycode":{"enable":false,"style":null},"fold":{"enable":false,"height":500},"bookmark":{"enable":false,"color":"#222","save":"auto"},"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"stickytabs":false,"motion":{"enable":true,"async":false,"transition":{"menu_item":"fadeInDown","post_block":"fadeIn","post_header":"fadeInDown","post_body":"fadeInDown","coll_header":"fadeInLeft","sidebar":"fadeInUp"}},"prism":false,"i18n":{"placeholder":"搜索...","empty":"没有找到任何搜索结果:${query}","hits_time":"找到 ${hits} 个搜索结果(用时 ${time} 毫秒)","hits":"找到 ${hits} 个搜索结果"}}</script><script src="/js/config.js"></script>
<meta name="description" content="在 awk、sed、cut 三个命令中,awk 是功能最强大的,基本能实现所有字符串操作,平时常用于较复杂的日志分析,不过比起别的命令来也会相对复杂一点。">
<meta property="og:type" content="article">
<meta property="og:title" content="cut、awk 使用总结">
<meta property="og:url" content="https://tallate.github.io/59c58f8f.html">
<meta property="og:site_name" content="Tallate">
<meta property="og:description" content="在 awk、sed、cut 三个命令中,awk 是功能最强大的,基本能实现所有字符串操作,平时常用于较复杂的日志分析,不过比起别的命令来也会相对复杂一点。">
<meta property="og:locale" content="zh_CN">
<meta property="article:published_time" content="2019-07-20T12:34:48.000Z">
<meta property="article:modified_time" content="2025-07-06T17:56:20.875Z">
<meta property="article:author" content="tallate">
<meta property="article:tag" content="Linux">
<meta property="article:tag" content="脚本">
<meta name="twitter:card" content="summary">
<link rel="canonical" href="https://tallate.github.io/59c58f8f.html">
<script class="next-config" data-name="page" type="application/json">{"sidebar":"","isHome":false,"isPost":true,"lang":"zh-CN","comments":true,"permalink":"https://tallate.github.io/59c58f8f.html","path":"/59c58f8f.html","title":"cut、awk 使用总结"}</script>
<script class="next-config" data-name="calendar" type="application/json">""</script>
<title>cut、awk 使用总结 | Tallate</title>
<noscript>
<link rel="stylesheet" href="/css/noscript.css">
</noscript>
</head>
<body itemscope itemtype="http://schema.org/WebPage" class="use-motion">
<div class="headband"></div>
<main class="main">
<div class="column">
<header class="header" itemscope itemtype="http://schema.org/WPHeader"><div class="site-brand-container">
<div class="site-nav-toggle">
<div class="toggle" aria-label="切换导航栏" role="button">
<span class="toggle-line"></span>
<span class="toggle-line"></span>
<span class="toggle-line"></span>
</div>
</div>
<div class="site-meta">
<a href="/" class="brand" rel="start">
<i class="logo-line"></i>
<p class="site-title">Tallate</p>
<i class="logo-line"></i>
</a>
<p class="site-subtitle" itemprop="description">该吃吃该喝喝 啥事别往心里搁</p>
</div>
<div class="site-nav-right">
<div class="toggle popup-trigger" aria-label="搜索" role="button">
<i class="fa fa-search fa-fw fa-lg"></i>
</div>
</div>
</div>
<nav class="site-nav">
<ul class="main-menu menu"><li class="menu-item menu-item-home"><a href="/" rel="section"><i class="home fa-fw"></i>首页</a></li><li class="menu-item menu-item-about"><a href="/about/" rel="section"><i class="user fa-fw"></i>关于</a></li><li class="menu-item menu-item-tags"><a href="/tags/" rel="section"><i class="tags fa-fw"></i>标签<span class="badge">84</span></a></li><li class="menu-item menu-item-categories"><a href="/categories/" rel="section"><i class="th fa-fw"></i>分类<span class="badge">25</span></a></li><li class="menu-item menu-item-archives"><a href="/archives/" rel="section"><i class="archive fa-fw"></i>归档<span class="badge">192</span></a></li>
<li class="menu-item menu-item-search">
<a role="button" class="popup-trigger"><i class="fa fa-search fa-fw"></i>搜索
</a>
</li>
</ul>
</nav>
<div class="search-pop-overlay">
<div class="popup search-popup"><div class="search-header">
<span class="search-icon">
<i class="fa fa-search"></i>
</span>
<div class="search-input-container">
<input autocomplete="off" autocapitalize="off" maxlength="80"
placeholder="搜索..." spellcheck="false"
type="search" class="search-input">
</div>
<span class="popup-btn-close" role="button">
<i class="fa fa-times-circle"></i>
</span>
</div>
<div class="search-result-container no-result">
<div class="search-result-icon">
<i class="fa fa-spinner fa-pulse fa-5x"></i>
</div>
</div>
</div>
</div>
</header>
<aside class="sidebar">
<div class="sidebar-inner sidebar-nav-active sidebar-toc-active">
<ul class="sidebar-nav">
<li class="sidebar-nav-toc">
文章目录
</li>
<li class="sidebar-nav-overview">
站点概览
</li>
</ul>
<div class="sidebar-panel-container">
<!--noindex-->
<div class="post-toc-wrap sidebar-panel">
<div class="post-toc animated"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E7%94%A8-awk-%E2%80%94%E2%80%94-%E6%AF%94%E8%BE%83-awk%E3%80%81sed-%E5%92%8C-cut"><span class="nav-number">1.</span> <span class="nav-text">为什么要用 awk —— 比较 awk、sed 和 cut</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E4%BD%BF%E7%94%A8-cut"><span class="nav-number">2.</span> <span class="nav-text">使用 cut</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#awk-%E5%91%BD%E4%BB%A4%E5%8F%82%E6%95%B0"><span class="nav-number">3.</span> <span class="nav-text">awk 命令参数</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#awk-%E8%AF%AD%E6%B3%95"><span class="nav-number">4.</span> <span class="nav-text">awk 语法</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%86%85%E7%BD%AE%E5%8F%98%E9%87%8F"><span class="nav-number">4.1.</span> <span class="nav-text">内置变量</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%86%85%E5%BB%BA%E5%87%BD%E6%95%B0"><span class="nav-number">4.2.</span> <span class="nav-text">内建函数</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%B8%80%E4%BA%9B-stupid-%E6%93%8D%E4%BD%9C"><span class="nav-number">4.3.</span> <span class="nav-text">一些 stupid 操作</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%BA%94%E7%94%A8"><span class="nav-number">5.</span> <span class="nav-text">应用</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%8E%BB%E6%8E%89%E7%A9%BA%E8%A1%8C"><span class="nav-number">5.1.</span> <span class="nav-text">去掉空行</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E8%8A%B1%E5%BC%8F%E5%88%86%E5%89%B2%E5%AD%97%E7%AC%A6%E4%B8%B2"><span class="nav-number">5.2.</span> <span class="nav-text">花式分割字符串</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%A4%8D%E6%9D%82%E6%A0%BC%E5%BC%8F%E5%8C%96"><span class="nav-number">5.3.</span> <span class="nav-text">复杂格式化</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E7%BB%9F%E8%AE%A1%E8%AF%B7%E6%B1%82%E9%87%8F"><span class="nav-number">5.4.</span> <span class="nav-text">统计请求量</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E7%BB%9F%E8%AE%A1%E5%BB%B6%E6%97%B6%E5%A4%A7%E4%BA%8E-100ms-%E7%9A%84%E8%AF%B7%E6%B1%82%E6%95%B0"><span class="nav-number">5.5.</span> <span class="nav-text">统计延时大于 100ms 的请求数</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E7%BB%9F%E8%AE%A1%E6%9F%90%E4%B8%AA%E6%97%B6%E9%97%B4%E6%AE%B5%E5%86%85%E6%9F%90%E4%B8%AA-userCode-%E7%9A%84%E8%AF%B7%E6%B1%82%E6%95%B0"><span class="nav-number">5.6.</span> <span class="nav-text">统计某个时间段内某个 userCode 的请求数</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E6%8C%89%E6%9F%90%E4%B8%80%E5%88%97%E5%8E%BB%E9%87%8D%E5%B9%B6%E6%B1%82%E5%92%8C"><span class="nav-number">5.7.</span> <span class="nav-text">按某一列去重并求和</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E8%BF%87%E6%BB%A4%E8%A1%8C"><span class="nav-number">5.8.</span> <span class="nav-text">过滤行</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E8%AE%A1%E7%AE%97%E6%AF%8F%E4%B8%AA%E4%BA%BA%E4%B8%80%E6%9C%88%E4%BB%BD%E5%B7%A5%E8%B5%84%E4%B9%8B%E5%92%8C"><span class="nav-number">5.9.</span> <span class="nav-text">计算每个人一月份工资之和</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%B0%86%E5%A4%A7%E6%96%87%E4%BB%B6%E6%8C%89%E8%A7%84%E5%88%99%E6%8B%86%E6%88%90%E5%B0%8F%E6%96%87%E4%BB%B6"><span class="nav-number">5.10.</span> <span class="nav-text">将大文件按规则拆成小文件</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#%E5%8F%82%E8%80%83"><span class="nav-number">6.</span> <span class="nav-text">参考</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E8%BF%9B%E9%98%B6"><span class="nav-number">6.1.</span> <span class="nav-text">进阶</span></a></li></ol></li></ol></div>
</div>
<!--/noindex-->
<div class="site-overview-wrap sidebar-panel">
<div class="site-author animated" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name">tallate</p>
<div class="site-description" itemprop="description"></div>
</div>
<div class="site-state-wrap animated">
<nav class="site-state">
<div class="site-state-item site-state-posts">
<a href="/archives/">
<span class="site-state-item-count">192</span>
<span class="site-state-item-name">日志</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<a href="/categories/">
<span class="site-state-item-count">25</span>
<span class="site-state-item-name">分类</span></a>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags/">
<span class="site-state-item-count">84</span>
<span class="site-state-item-name">标签</span></a>
</div>
</nav>
</div>
</div>
</div>
</div>
</aside>
</div>
<div class="main-inner post posts-expand">
<div class="post-block">
<article itemscope itemtype="http://schema.org/Article" class="post-content" lang="zh-CN">
<link itemprop="mainEntityOfPage" href="https://tallate.github.io/59c58f8f.html">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="image" content="/images/avatar.gif">
<meta itemprop="name" content="tallate">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="Tallate">
<meta itemprop="description" content="">
</span>
<span hidden itemprop="post" itemscope itemtype="http://schema.org/CreativeWork">
<meta itemprop="name" content="cut、awk 使用总结 | Tallate">
<meta itemprop="description" content="">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">
cut、awk 使用总结
</h1>
<div class="post-meta-container">
<div class="post-meta">
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建时间:2019-07-20 20:34:48" itemprop="dateCreated datePublished" datetime="2019-07-20T20:34:48+08:00">2019-07-20</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-calendar-check"></i>
</span>
<span class="post-meta-item-text">更新于</span>
<time title="修改时间:2025-07-07 01:56:20" itemprop="dateModified" datetime="2025-07-07T01:56:20+08:00">2025-07-07</time>
</span>
<span class="post-meta-item">
<span class="post-meta-item-icon">
<i class="far fa-folder"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/Linux/" itemprop="url" rel="index"><span itemprop="name">Linux</span></a>
</span>
</span>
</div>
</div>
</header>
<div class="post-body" itemprop="articleBody"><p>在 awk、sed、cut 三个命令中,awk 是功能最强大的,基本能实现所有字符串操作,平时常用于较复杂的日志分析,不过比起别的命令来也会相对复杂一点。</p>
<span id="more"></span>
<h2 id="为什么要用-awk-——-比较-awk、sed-和-cut"><a href="#为什么要用-awk-——-比较-awk、sed-和-cut" class="headerlink" title="为什么要用 awk —— 比较 awk、sed 和 cut"></a>为什么要用 awk —— 比较 awk、sed 和 cut</h2><p>sed 是流编辑器,它逐行取出文本内容再进行处理,和我们平时使用的交互式文本编辑器差不多,比如插入新行、修改某行、根据正则匹配某行的同时执行修改。<br>awk 是报表生成工具,同样是逐行取出文件,但是取出的目的是对内容进行二次加工,然后将有用的数据单独格式化输出、或进行归纳统计得到统计结果等。<br>比如,统计日志中某 IP 出现的次数,awk 很方便做到但是 sed 就很困难;再如,要在文本某一行之前插入一行,sed 很容易做到但是 awk 就比较麻烦。<br>cut 有点像一个简化版的 awk,专门用来分割文本并取需要的列进行显示,如果只用到这个功能,使用 cut 写起来会更简练方便。</p>
<h2 id="使用-cut"><a href="#使用-cut" class="headerlink" title="使用 cut"></a>使用 cut</h2><p>cut 命令用于按“列”来提取文本字符,格式为:<code>cut [参数] 文本</code>。</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"># 以:作为分隔符,且仅看第一列</span><br><span class="line">cut -d: -f1 /etc/passwd</span><br></pre></td></tr></table></figure>
<h2 id="awk-命令参数"><a href="#awk-命令参数" class="headerlink" title="awk 命令参数"></a>awk 命令参数</h2><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">$ awk '{print 0}' test.txt</span><br><span class="line"># 读取脚本文件</span><br><span class="line">$ awk -f test.awk test.txt</span><br></pre></td></tr></table></figure>
<h2 id="awk-语法"><a href="#awk-语法" class="headerlink" title="awk 语法"></a>awk 语法</h2><ul>
<li>基本用法<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">$ awk 动作 文件名</span><br><span class="line">例子:</span><br><span class="line">$ awk '{print 0}' test.txt</span><br></pre></td></tr></table></figure>
动作一般会用<code>{</code>和<code>}</code>包起来,awk 会读取文件中的每一行,然后对每一行执行一次动作</li>
<li>转义<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">$ awk '{match($0, "\\[(.+)\\]", a); print a[1];}' test.txt</span><br><span class="line">$ awk '{ if($0 ~ "^\\[TraceId.*") print $0 }' catalina.out</span><br></pre></td></tr></table></figure>
注意这里使用到了一对反斜杠<code>\\</code>,是因为 awk 本身是一个语言,它会先有一个编译的过程,然后再用正则表达式引擎去解释,如果是<code>\.</code>,则会在第一步将其解释为<code>.</code>,在第二步时就会匹配任意字符</li>
<li>条件<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><span class="line">$ awk '条件 动作' 文件名</span><br><span class="line">或者</span><br><span class="line">$ awk '{if (条件) 动作}' 文件名</span><br><span class="line">$ awk '{if (条件) 动作1; else 动作2}' 文件名</span><br><span class="line">例子:</span><br><span class="line">$ awk -F ':' '/usr/ {print $1}' test.txt</span><br><span class="line">$ awk -F ':' '{if ($1 > "m") print $1; else print "---"}' test.txt</span><br></pre></td></tr></table></figure></li>
<li>预处理和结束处理<br>有时候需要在命令执行前初始化一些全局变量,或者需要在文件的全部行遍历完毕后输出一些结果,可以考虑使用<code>BEGIN</code>、<code>END</code>语句块<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">$ awk 'BEGIN{a=0}{a+=1; print NR}END{print "行数=", a}' test.txt</span><br></pre></td></tr></table></figure></li>
</ul>
<h3 id="内置变量"><a href="#内置变量" class="headerlink" title="内置变量"></a>内置变量</h3><ul>
<li>NF:NF 表示目前的记录被分割的字段的数目,NF 可以理解为 Number of Field。</li>
<li>NR:NR 表示从 awk 开始执行后,按照记录分隔符读取的数据次数,默认的记录分隔符为换行符,因此默认的就是读取的数据行数,NR 可以理解为 Number of Record 的缩写。</li>
<li>FNR:在 awk 处理多个输入文件的时候,在处理完第一个文件后,NR 并不会从 1 开始,而是继续累加,因此就出现了 FNR,每当处理一个新文件的时候,FNR 就从 1 开始计数,FNR 可以理解为 File Number of Record。 <figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"># 从第二个文件中排除掉第一个文件中已出现的数据</span><br><span class="line"># https://blog.csdn.net/stanjiang2010/article/details/6184458</span><br><span class="line">awk -F ':' 'NR==FNR{a[$1]=1;}NR>FNR{if(a[$1]!=1){print $0;}}' a.txt b.txt</span><br></pre></td></tr></table></figure></li>
<li>FILENAME:当前文件名</li>
<li>FS:字段分隔符,默认是空格和制表符。</li>
<li>RS:行分隔符,用于分割每一行,默认是换行符。</li>
<li>OFS:输出字段的分隔符,用于打印时分隔字段,默认为空格。</li>
<li>ORS:输出记录的分隔符,用于打印时分隔记录,默认为换行符。</li>
<li>OFMT:数字输出的格式,默认为%.6g。</li>
</ul>
<h3 id="内建函数"><a href="#内建函数" class="headerlink" title="内建函数"></a>内建函数</h3><ul>
<li><p>toupper():字符转为大写</p>
</li>
<li><p>tolower():字符转为小写。</p>
</li>
<li><p>sin():正弦。</p>
</li>
<li><p>cos():余弦。</p>
</li>
<li><p>sqrt():平方根。</p>
</li>
<li><p>rand():随机数。</p>
</li>
<li><p>length<br>length 函数返回整个记录中的字符数。</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">echo "123" | awk '{print length}'</span><br><span class="line">echo "123 4567" | awk -F ' ' '{print length($2)}'</span><br></pre></td></tr></table></figure></li>
<li><p>substr<br>返回从起始位置起,指定长度之子字符串;若未指定长度,则返回从起始位置到字符串末尾的子字符串。<br>格式:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">substr(s,p) # 返回字符串s中从p开始的后缀部分</span><br><span class="line">substr(s,p,n) # 返回字符串s中从p开始长度为n的后缀部分</span><br></pre></td></tr></table></figure></li>
<li><p>split<br>split 允许把一个字符串分割为单词存储在数组中,可以自己定义域分隔符,或者使用现在 FS(域分隔符)的值。<br>格式:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">split(string, array, field separator)</span><br><span class="line">split(string, array) # 如果第三个参数没有提供,awk就默认使用当前FS值</span><br></pre></td></tr></table></figure>
<p>示例:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">awk -F ',' '{print substr($3,6)}' ---> 表示是从第3个字段里的第6个字符开始,一直到设定的分隔符","结束.</span><br><span class="line">substr($3,10,8) ---> 表示是从第3个字段里的第10个字符开始,截取8个字符结束.</span><br><span class="line">substr($3,6) ---> 表示是从第3个字段里的第6个字符开始,一直到结尾</span><br></pre></td></tr></table></figure></li>
<li><p>gsub<br>gsub 函数则使得在所有正则表达式被匹配的时候都发生替换。<br>格式:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">gsub(regular expression, subsitution string, target string);</span><br><span class="line">简称 gsub(r,s,t)。</span><br></pre></td></tr></table></figure>
<p>示例:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"># 把一个文件里面所有包含 abc 的行里面的 abc 替换成 defg,然后输出第一列和第三列</span><br><span class="line"># ~是包含,!~是不包含</span><br><span class="line">awk '$0 ~ /abc/ {gsub("abc", "defg", $0); print $1, $3}' abc.txt</span><br></pre></td></tr></table></figure>
</li>
<li><p>match<br>match 匹配给定字符串并提取部分内容,并将第一次匹配的位置下标和子串长度分别赋给内置变量 RSTART 和 RLENGTH。<br>格式:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">match(string, regexp [, array])</span><br></pre></td></tr></table></figure>
<p>在正则表达式中,可以使用’()’将要提取的成分标出,然后就可以在结果数组里面取到了。<br>示例:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">echo foooobazbarrrrr | awk '{ match($0, /(fo+).+(bar*)/, arr); print arr[1], arr[2];}'</span><br></pre></td></tr></table></figure>
<p>经试验,awk 的正则表达式似乎不支持<strong>惰性匹配</strong>:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">$ echo '123,123,123' | awk '{match($0, "(.*?),", a); print a[1];}'</span><br><span class="line"></span><br><span class="line">123,123</span><br></pre></td></tr></table></figure>
<p>理想的输出结果是”123”,但是结果却输出了”123,123”,这是<strong>贪婪匹配</strong>的结果。<br>为了让 awk 能正确匹配,需要关注内容的格式,真实环境中像”123,123,123”这样重复的是比较少的,比如下面的脚本利用文本内容中”1”、”2”、”3”不重复的特征,匹配中第一对”AB”、”BA”中间的序列:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><span class="line">BEGIN {</span><br><span class="line"> str="AB1BA2AB3BA"</span><br><span class="line"> regex="AB([^高迟,30,,30,晚餐</span><br><span class="line"> 2019.11.12,10:07:29,22:04:50,黄AB])*BA"</span><br><span class="line"> if (match(str,regex))</span><br><span class="line"> print substr(str,RSTART,RLENGTH)</span><br><span class="line">}</span><br><span class="line"></span><br><span class="line">AB1BA</span><br></pre></td></tr></table></figure>
</li>
<li><p>system<br>awk 可以调用 shell 的其他命令,不过拿不到命令执行结果,只能得到脚本执行结果的 state:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk '{system("echo "$0)}' result.csv</span><br></pre></td></tr></table></figure></li>
</ul>
<h3 id="一些-stupid-操作"><a href="#一些-stupid-操作" class="headerlink" title="一些 stupid 操作"></a>一些 stupid 操作</h3><ul>
<li>在遍历每一行时维持一个全局变量<br>全局变量不管在哪个语言内都需要谨慎对待。<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk '{a+=$(NF-2); print "Total so far:", a}' logs.txt</span><br></pre></td></tr></table></figure></li>
</ul>
<h2 id="应用"><a href="#应用" class="headerlink" title="应用"></a>应用</h2><h3 id="去掉空行"><a href="#去掉空行" class="headerlink" title="去掉空行"></a>去掉空行</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk '{if($0 != "") print $0;}' test.txt</span><br></pre></td></tr></table></figure>
<h3 id="花式分割字符串"><a href="#花式分割字符串" class="headerlink" title="花式分割字符串"></a>花式分割字符串</h3><p>awk 命令默认使用空格分割字符串,引用这些分割后的字符串可以使用$1、$2、$3…引用,也可以使用-F 选项设置分隔符:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk -F ':' '{print e$2}' test.txt</span><br></pre></td></tr></table></figure>
<p>也可以在<code>BEGIN</code>块里修改<code>FS</code>变量:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk 'BEGIN{FS=":"}{print $1}' test.txt</span><br></pre></td></tr></table></figure>
<h3 id="复杂格式化"><a href="#复杂格式化" class="headerlink" title="复杂格式化"></a>复杂格式化</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk -F ' ' '{print "insert into obfuscate_dictionary (dic_key, code) values('\''"$2"'\'', '\''"$3"'\'');"}' dics.txt > result.txt</span><br></pre></td></tr></table></figure>
<h3 id="统计请求量"><a href="#统计请求量" class="headerlink" title="统计请求量"></a>统计请求量</h3><ul>
<li>统计请求日志中某个时间段内每秒的请求量:<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"># 内容格式:QTrace[链路标识] 19:01:12.123 content[日志内容]</span><br><span class="line">sudo grep "19:0" request.log | awk -F ' ' '{print $2}' | awk -F. '{print$1}' | sort | awk -F: '{a[$1":"$2":"$3]++}END{for(i in a){split(i,t);print t[1]":"t[2]":"t[3]"访问"a[i]"次"}}' > result.txt</span><br></pre></td></tr></table></figure></li>
<li>统计 request.log 所有 url 请求数,按照次数降序排列<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"># 请求日志request.log每一行的内容为"xxxx uri=xxxx, xxxx"</span><br><span class="line"># [^,]表示匹配一个除','之外的字符</span><br><span class="line"># substr($0, RSTART, RLENGTH)提取子串起始下标为RSTART,长度为RLENGTH,这里+4和-5是为了去掉结果中的"uri"前缀和","后缀,如果有必要,还可以接上`| awk '{ sub("^ *", ""); sub(" *$", ""); print }'`这样的命令来去掉开头和末尾的空格</span><br><span class="line"># uniq可以用于去重,但是当重复的行不相邻时,uniq命令是不起作用的,所以要先用sort进行排序</span><br><span class="line"># 最后使用sort排序输出含有比较多的选项,-r: 逆序,-n: 当做数字进行排序,-k 1: 按第一列排序</span><br><span class="line">$ awk '{ match($0, "uri[^,]*,"); print substr($0, RSTART + 4, RLENGTH - 5) }' request.log | sort | uniq -c | sort -r -n -k 1</span><br></pre></td></tr></table></figure></li>
<li>统计每 10 秒的请求量<br>原理其实都是差不多的,剩下的就是格式化了。<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"># 假设格式为"[2019-09-26 11:11:11.111] code=123,xxxxx"</span><br><span class="line">grep "过滤条件" request.log | grep "H114801" | awk '{match($0, "\\[....-..-.. (..:..:.).\\....\\].*code=(.*),.*", a); b[a[1]"-"a[2]]++}END{for(i in b){split(i, t); print "时间:"t[1]" 编码:"t[2]" 请求量:"a[i]}}'</span><br></pre></td></tr></table></figure></li>
</ul>
<h3 id="统计延时大于-100ms-的请求数"><a href="#统计延时大于-100ms-的请求数" class="headerlink" title="统计延时大于 100ms 的请求数"></a>统计延时大于 100ms 的请求数</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"># 示例请求日志格式为:request:...; response:...; cost:[123 ms]</span><br><span class="line">grep "过滤请求" request.log | awk '{match($0, "cost:\\[(.+) ms\\]", a); if(a[1] >= 100) print a[1];}' | wc -l</span><br></pre></td></tr></table></figure>
<h3 id="统计某个时间段内某个-userCode-的请求数"><a href="#统计某个时间段内某个-userCode-的请求数" class="headerlink" title="统计某个时间段内某个 userCode 的请求数"></a>统计某个时间段内某个 userCode 的请求数</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line"># 内容格式:QTrace[链路标识] 19:01:12.123 INFO userCode=abc123, request=..., response=...</span><br><span class="line">grep "过滤信息" request.log | awk '{match($0, "2019-08-15 (..):(..):(..).+ INFO.+userCode=(.+), req", a); if(a[2]>=20&&a[2]<35) print a[4];}' | sort | uniq -c</span><br></pre></td></tr></table></figure>
<h3 id="按某一列去重并求和"><a href="#按某一列去重并求和" class="headerlink" title="按某一列去重并求和"></a>按某一列去重并求和</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"># 内容格式:</span><br><span class="line">2 小航分数</span><br><span class="line">1 小绿分数</span><br><span class="line">5 小航分数</span><br><span class="line"># 按其中第二列去重,并将第一列的分数加起来,结果使用','分割是为了方便在csv中显示</span><br><span class="line">awk '{a[$2]=a[$2]+$1;}END{for(i in a){print i, ",", a[i];}}' 0820-0835期间.txt</span><br></pre></td></tr></table></figure>
<h3 id="过滤行"><a href="#过滤行" class="headerlink" title="过滤行"></a>过滤行</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"># 内容格式:1 3</span><br><span class="line"># $NF表示当前行有多少个字段,因此$NF可以表示最后一个字段,$(NF-1)就表示倒数第二个字段,以此类推...</span><br><span class="line">awk -F ' ' 'NF>2{print NF}' test.txt</span><br><span class="line"># 打印奇数行,并且匹配某个正则表达式,NR表示当前处理的是第几行</span><br><span class="line">awk 'NR%2==1 && /你好/ {print NR ") " $1}' test.txt</span><br></pre></td></tr></table></figure>
<h3 id="计算每个人一月份工资之和"><a href="#计算每个人一月份工资之和" class="headerlink" title="计算每个人一月份工资之和"></a>计算每个人一月份工资之和</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"># 内容格式:</span><br><span class="line"># John 2013-01-13 bike 41000</span><br><span class="line"># vivi 2013-01-18 car 42800</span><br><span class="line"># Tom 2013-01-20 car 32500</span><br><span class="line"># John 2013-01-28 bike 63500</span><br><span class="line">awk '{split($2,a,"-");if(a[2]==01){b[$1]+=$4}}END{for(i in b)print i,b[i]}' test.txt</span><br></pre></td></tr></table></figure>
<h3 id="将大文件按规则拆成小文件"><a href="#将大文件按规则拆成小文件" class="headerlink" title="将大文件按规则拆成小文件"></a>将大文件按规则拆成小文件</h3><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">awk -F ',' '{print $3 > $2".txt"}' input.csv</span><br></pre></td></tr></table></figure>
<p>输入内容格式:</p>
<figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">1,Mike,50$</span><br><span class="line">2,Mike,100$</span><br><span class="line">3,Joe,175$</span><br></pre></td></tr></table></figure>
<h2 id="参考"><a href="#参考" class="headerlink" title="参考"></a>参考</h2><ol>
<li><a target="_blank" rel="noopener" href="http://www.ruanyifeng.com/blog/2018/11/awk.html">awk 入门教程</a></li>
<li><a target="_blank" rel="noopener" href="https://gregable.com/2010/09/why-you-should-know-just-little-awk.html">Why you should learn just a little Awk: An Awk tutorial by Example</a></li>
<li><a target="_blank" rel="noopener" href="https://www.digitalocean.com/community/tutorials/how-to-use-the-awk-language-to-manipulate-text-in-linux">How To Use the AWK language to Manipulate Text in Linux</a></li>
<li><a target="_blank" rel="noopener" href="https://learnxinyminutes.com/docs/awk/">Learn X in Y minutes</a></li>
<li><a target="_blank" rel="noopener" href="https://likegeeks.com/awk-command/">30 Examples For Awk Command In Text Processing</a></li>
</ol>
<h3 id="进阶"><a href="#进阶" class="headerlink" title="进阶"></a>进阶</h3><ol>
<li><a target="_blank" rel="noopener" href="https://github.com/wuzhouhui/awk">《AWK 程序设计语言》中文翻译</a></li>
<li><a target="_blank" rel="noopener" href="https://www.cnblogs.com/DengGao/p/5935730.html">shell-正则表达式</a><br>正则表达式分为三种:BREs、EREs、PREs,如果发现文本工具、语言中的正则语法和自己理解的不同,有可能是因为它们选择实现的正则。</li>
<li><a target="_blank" rel="noopener" href="https://www.cnblogs.com/DengGao/p/5935719.html">awk-模式匹配</a></li>
<li><a target="_blank" rel="noopener" href="https://www.gnu.org/software/gawk/manual/html_node/index.html#SEC_Contents">The GNU Awk User’s Guide</a><br><a target="_blank" rel="noopener" href="https://www.gnu.org/software/gawk/manual/gawk.html#String-Functions">String Functions</a></li>
</ol>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/kity@2.0.4/dist/kity.min.js"></script><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/kityminder-core@1.4.50/dist/kityminder.core.min.js"></script><script defer="true" type="text/javascript" src="https://cdn.jsdelivr.net/npm/hexo-simple-mindmap@0.6.0/dist/mindmap.min.js"></script><link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/hexo-simple-mindmap@0.6.0/dist/mindmap.min.css">
</div>
<footer class="post-footer">
<div class="post-tags">
<a href="/tags/Linux/" rel="tag"># Linux</a>
<a href="/tags/%E8%84%9A%E6%9C%AC/" rel="tag"># 脚本</a>
</div>
<div class="post-nav">
<div class="post-nav-item">
<a href="/cc86b541.html" rel="prev" title="TCP协议总结">
<i class="fa fa-angle-left"></i> TCP协议总结
</a>
</div>
<div class="post-nav-item">
<a href="/9acacb00.html" rel="next" title="Kubernetes入门">
Kubernetes入门 <i class="fa fa-angle-right"></i>
</a>
</div>
</div>
</footer>
</article>
</div>
</div>
</main>
<footer class="footer">
<div class="footer-inner">
<div class="copyright">
©
<span itemprop="copyrightYear">2025</span>
<span class="with-love">
<i class="fa fa-heart"></i>
</span>
<span class="author" itemprop="copyrightHolder">tallate</span>
</div>
<div class="powered-by">由 <a href="https://hexo.io/" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.js.org/" rel="noopener" target="_blank">NexT.Gemini</a> 强力驱动
</div>
</div>
</footer>
<div class="back-to-top" role="button" aria-label="返回顶部">
<i class="fa fa-arrow-up fa-lg"></i>
<span>0%</span>
</div>
<a href="https://github.com/tallate" class="github-corner" title="在 GitHub 上关注我" aria-label="在 GitHub 上关注我" rel="noopener" target="_blank"><svg width="80" height="80" viewBox="0 0 250 250" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a>
<noscript>
<div class="noscript-warning">Theme NexT works best with JavaScript enabled</div>
</noscript>
<script src="https://cdnjs.cloudflare.com/ajax/libs/animejs/3.2.1/anime.min.js" integrity="sha256-XL2inqUJaslATFnHdJOi9GfQ60on8Wx1C2H8DYiN1xY=" crossorigin="anonymous"></script>
<script src="/js/comments.js"></script><script src="/js/utils.js"></script><script src="/js/motion.js"></script><script src="/js/next-boot.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/hexo-generator-searchdb/1.4.1/search.js" integrity="sha256-1kfA5uHPf65M5cphT2dvymhkuyHPQp5A53EGZOnOLmc=" crossorigin="anonymous"></script>
<script src="/js/third-party/search/local-search.js"></script>
<script class="next-config" data-name="mermaid" type="application/json">{"enable":true,"version":"7.1.2","options":null,"js":{"url":"https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.3.0/mermaid.min.js","integrity":"sha256-9y71g5Lz/KLsHjB8uXwnkuWDtAMDSzD/HdIbqhJfTAI="}}</script>
<script src="/js/third-party/tags/mermaid.js"></script>
</body>
</html>