-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstep4.cpp
More file actions
92 lines (77 loc) · 1.87 KB
/
step4.cpp
File metadata and controls
92 lines (77 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
#include <algorithm>
#include "dictionary.h"
using namespace std;
bool GetDocnamenDocpos(ifstream& fin, char *docname, int& docpos) {
SkipBlanks(fin);
if(fin.peek() == EOF) return false;
docpos = fin.tellg();
if(!(fin >> docname)) throw "말도안돼";
return true;
}
int main()
{
int count_word = 0;
int numwords = 0;
int numdocs = 0;
int word_position = 0;
double max_idf = 0;
Dict_Term tm;
Posting pst;
string input;
// 파일 입출력
ofstream wout("ir.words");
ofstream iout("ir.info");
ofstream dout("ir.dictionary", ios::out | ios::binary);
ofstream pout("ir.postings", ios::out | ios::binary);
// 전체 문서 수 구하기
ifstream fin("ir.docnames");
//ifstream fin("ir.mydocnames");
char docname[MAXLEN+1]; //문서명
int docpos;
while (GetDocnamenDocpos(fin, docname, docpos))
{
ifstream in(docname);
if(!in)
continue;
numdocs++;
in.close();
}
while(cin >> input)
{
int totalfreq;
tm.wordpos = wout.tellp();
// ir.words 파일에 단어 쓰기
wout << input;
wout << " ";
numwords++;
count_word++;
if(count_word == 5)
{
wout << "\n";
count_word = 0;
}
cin >> tm.numposts >> tm.dict_freq;
// idf 계산 및 최대 idf 저장
tm.idf = log((double)numdocs / tm.numposts)/log((double)2);
max_idf = max(tm.idf, max_idf);
// Term의 첫 문서위치 저장
tm.poststart = pout.tellp() / sizeof(Posting);
// Term의 (문서위치, 문서 내 빈도) 저장
for(int i = 0; i < tm.numposts; i++)
{
cin >> pst.docpos >> pst.freq;
pout.write(reinterpret_cast <const char *> (&pst), sizeof(Posting));
}
dout.write(reinterpret_cast<const char *> (&tm), sizeof(Dict_Term));
}
// ir.info 파일에 정보 저장
iout << numdocs << " " << numwords << " " << max_idf;
iout.close();
wout.close();
dout.close();
pout.close();
}