Skip to content

Commit b5c255c

Browse files
committed
增加es埋点,优化es搜索
1 parent 914ac04 commit b5c255c

File tree

7 files changed

+175
-145
lines changed

7 files changed

+175
-145
lines changed

DjangoBlog/elasticsearch_backend.py

+21-98
Original file line numberDiff line numberDiff line change
@@ -10,72 +10,55 @@
1010
@file: elasticsearch_backend.py
1111
@time: 2019-04-13 11:46
1212
"""
13+
1314
import logging
1415
import re
15-
import json
16-
17-
from datetime import datetime, timedelta
18-
19-
from django.conf import settings
20-
from django.core.exceptions import ImproperlyConfigured
21-
from django.utils import six
22-
from django.utils.datetime_safe import datetime
2316
from django.utils.encoding import force_text
2417

2518
from elasticsearch_dsl import Q
2619

2720
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
28-
from haystack.constants import DJANGO_CT, DJANGO_ID, ID
29-
from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
30-
from haystack.inputs import Clean, Exact, PythonData, Raw
3121
from haystack.models import SearchResult
3222
from haystack.utils import log as logging
33-
from haystack.utils import get_identifier, get_model_ct
34-
from haystack.utils.app_loading import haystack_get_model
35-
from django_elasticsearch_dsl.registries import registry
3623

3724
from blog.models import Article
38-
from blog.documents import ArticleDocument
25+
from blog.documents import ArticleDocument, ArticleDocumentManager
3926

4027
logger = logging.getLogger(__name__)
4128

42-
DATETIME_REGEX = re.compile(
43-
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
44-
4529

4630
class ElasticSearchBackend(BaseSearchBackend):
31+
def __init__(self, connection_alias, **connection_options):
32+
super(ElasticSearchBackend, self).__init__(connection_alias, **connection_options)
33+
self.manager = ArticleDocumentManager()
34+
self._rebuild(None)
4735

48-
def _get_models(self):
49-
models = registry.get_models()
50-
return set(models)
36+
def _get_models(self, iterable):
37+
models = iterable if iterable else Article.objects.all()
38+
docs = self.manager.convert_to_doc(models)
39+
return docs
5140

5241
def _create(self, models):
53-
for index in registry.get_indices(models):
54-
index.create()
55-
56-
def _populate(self, models):
57-
for doc in registry.get_documents(models):
58-
qs = doc().get_queryset()
59-
doc().update(qs)
42+
self.manager.create_index()
43+
docs = self._get_models(models)
44+
self.manager.rebuild(docs)
6045

6146
def _delete(self, models):
62-
for index in registry.get_indices(models):
63-
index.delete(ignore=404)
47+
for m in models:
48+
m.delete()
6449
return True
6550

6651
def _rebuild(self, models):
67-
if not self._delete(models):
68-
return
69-
70-
self._create(models)
71-
self._populate(models)
52+
models = models if models else Article.objects.all()
53+
docs = self.manager.convert_to_doc(models)
54+
self.manager.update_docs(docs)
7255

7356
def update(self, index, iterable, commit=True):
74-
models = self._get_models()
75-
# self._rebuild(models)
57+
models = self._get_models(iterable)
58+
self.manager.update_docs(models)
7659

7760
def remove(self, obj_or_string):
78-
models = self._get_models()
61+
models = self._get_models([obj_or_string])
7962
self._delete(models)
8063

8164
def clear(self, models=None, commit=True):
@@ -124,66 +107,6 @@ def search(self, query_string, **kwargs):
124107
'spelling_suggestion': spelling_suggestion,
125108
}
126109

127-
def _from_python(self, value):
128-
"""
129-
Converts Python values to a string for Whoosh.
130-
131-
Code courtesy of pysolr.
132-
"""
133-
if hasattr(value, 'strftime'):
134-
if not hasattr(value, 'hour'):
135-
value = datetime(value.year, value.month, value.day, 0, 0, 0)
136-
elif isinstance(value, bool):
137-
if value:
138-
value = 'true'
139-
else:
140-
value = 'false'
141-
elif isinstance(value, (list, tuple)):
142-
value = u','.join([force_text(v) for v in value])
143-
elif isinstance(value, (six.integer_types, float)):
144-
# Leave it alone.
145-
pass
146-
else:
147-
value = force_text(value)
148-
return value
149-
150-
def _to_python(self, value):
151-
"""
152-
Converts values from Whoosh to native Python values.
153-
154-
A port of the same method in pysolr, as they deal with data the same way.
155-
"""
156-
if value == 'true':
157-
return True
158-
elif value == 'false':
159-
return False
160-
161-
if value and isinstance(value, six.string_types):
162-
possible_datetime = DATETIME_REGEX.search(value)
163-
164-
if possible_datetime:
165-
date_values = possible_datetime.groupdict()
166-
167-
for dk, dv in date_values.items():
168-
date_values[dk] = int(dv)
169-
170-
return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'],
171-
date_values['minute'], date_values['second'])
172-
173-
try:
174-
# Attempt to use json to load the values.
175-
converted_value = json.loads(value)
176-
177-
# Try to handle most built-in types.
178-
if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
179-
return converted_value
180-
except:
181-
# If it fails (SyntaxError or its ilk) or we don't trust it,
182-
# continue on.
183-
pass
184-
185-
return value
186-
187110

188111
class ElasticSearchQuery(BaseSearchQuery):
189112
def _convert_datetime(self, date):

blog/documents.py

+110-42
Original file line numberDiff line numberDiff line change
@@ -10,54 +10,122 @@
1010
@file: documents.py
1111
@time: 2019-04-05 13:05
1212
"""
13-
14-
from django_elasticsearch_dsl import DocType, Index, fields
13+
import time
1514
from blog.models import Article, Category, Tag
16-
from accounts.models import BlogUser
17-
18-
blog = Index('blog')
19-
blog.settings(
20-
number_of_shards=1,
21-
number_of_replicas=0
22-
)
23-
24-
25-
@blog.doc_type
26-
class ArticleDocument(DocType):
27-
body = fields.TextField(attr='body_to_string', analyzer='ik_max_word')
28-
title = fields.TextField(analyzer='ik_max_word')
29-
author = fields.ObjectField(properties={
30-
'nickname': fields.TextField(analyzer='ik_max_word'),
31-
'id': fields.IntegerField()
15+
from elasticsearch_dsl import Document, Date, Integer, Keyword, Text, Object, Boolean
16+
17+
from django.conf import settings
18+
19+
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
20+
21+
from elasticsearch_dsl.connections import connections
22+
23+
if ELASTICSEARCH_ENABLED:
24+
connections.create_connection(hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
25+
26+
27+
class ElapsedTimeDocument(Document):
28+
url = Text()
29+
time_taken = Integer()
30+
log_datetime = Date()
31+
type = Text(analyzer='ik_max_word')
32+
33+
class Index:
34+
name = 'performance'
35+
settings = {
36+
"number_of_shards": 1,
37+
"number_of_replicas": 0
38+
}
39+
40+
class Meta:
41+
doc_type = 'ElapsedTime'
42+
43+
44+
class ElaspedTimeDocumentManager():
45+
46+
@staticmethod
47+
def create(url, time_taken, log_datetime, type):
48+
if not hasattr(ElaspedTimeDocumentManager, 'mapping_created'):
49+
ElapsedTimeDocument.init()
50+
setattr(ElaspedTimeDocumentManager, 'mapping_created', True)
51+
doc = ElapsedTimeDocument(meta={'id': int(round(time.time() * 1000))}, url=url, time_taken=time_taken,
52+
log_datetime=log_datetime, type=type)
53+
doc.save()
54+
55+
56+
class ArticleDocument(Document):
57+
body = Text(analyzer='ik_max_word')
58+
title = Text(analyzer='ik_max_word')
59+
author = Object(properties={
60+
'nickname': Text(analyzer='ik_max_word'),
61+
'id': Integer()
3262
})
33-
category = fields.ObjectField(properties={
34-
'name': fields.TextField(analyzer='ik_max_word'),
35-
'id': fields.IntegerField()
63+
category = Object(properties={
64+
'name': Text(analyzer='ik_max_word'),
65+
'id': Integer()
3666
})
37-
tags = fields.ObjectField(properties={
38-
'name': fields.TextField(analyzer='ik_max_word'),
39-
'id': fields.IntegerField()
67+
tags = Object(properties={
68+
'name': Text(analyzer='ik_max_word'),
69+
'id': Integer()
4070
})
4171

42-
# def get_instances_from_related(self, related_instance):
43-
# if isinstance(related_instance, BlogUser):
44-
# return related_instance
45-
# elif isinstance(related_instance, Category):
46-
# pass
72+
pub_time = Date()
73+
status = Text()
74+
comment_status = Text()
75+
type = Text()
76+
views = Integer()
77+
article_order = Integer()
78+
79+
class Index:
80+
name = 'blog'
81+
settings = {
82+
"number_of_shards": 1,
83+
"number_of_replicas": 0
84+
}
4785

4886
class Meta:
49-
model = Article
50-
fields = [
51-
'pub_time',
52-
'status',
53-
'comment_status',
54-
'type',
55-
'views',
56-
'article_order',
57-
58-
]
59-
# related_models = [Category, Tag, BlogUser]
6087
doc_type = 'Article'
61-
auto_refresh = False
62-
ignore_signals = True
6388

89+
90+
class ArticleDocumentManager():
91+
92+
def __init__(self):
93+
94+
ArticleDocument.init()
95+
96+
def create_index(self):
97+
ArticleDocument.init()
98+
99+
def deleate_index(self):
100+
from elasticsearch import Elasticsearch
101+
es = Elasticsearch()
102+
es.indices.delete(index='blog', ignore=[400, 404])
103+
104+
def convert_to_doc(self, articles):
105+
return [ArticleDocument(meta={'id': article.id}, body=article.body, title=article.title,
106+
auth={
107+
'nikename': article.author.username,
108+
'id': article.author.id
109+
},
110+
category={
111+
'name': article.category.name,
112+
'id': article.category.id
113+
},
114+
tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
115+
pub_time=article.pub_time,
116+
status=article.status,
117+
comment_status=article.comment_status,
118+
type=article.type,
119+
views=article.views,
120+
article_order=article.article_order
121+
) for article in articles]
122+
123+
def rebuild(self, articles=None):
124+
articles = articles if articles else Article.objects.all()
125+
docs = self.convert_to_doc(articles)
126+
for doc in docs:
127+
doc.save()
128+
129+
def update_docs(self, docs):
130+
for doc in docs:
131+
doc.save()
+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
"""
4+
@version: ??
5+
@author: liangliangyy
6+
@license: MIT Licence
7+
8+
@site: https://www.lylinux.net/
9+
@software: PyCharm
10+
@file: build_index.py
11+
@time: 2019-04-20 20:39
12+
"""
13+
14+
from blog.documents import ArticleDocument, ArticleDocumentManager
15+
16+
from django.core.management.base import BaseCommand
17+
from blog.models import Article
18+
19+
20+
# TODO 参数化
21+
class Command(BaseCommand):
22+
help = 'build search index'
23+
24+
def handle(self, *args, **options):
25+
manager = ArticleDocumentManager()
26+
manager.deleate_index()
27+
manager.rebuild()

blog/middleware.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212
@file: middleware.py
1313
@time: 2017/1/19 上午12:36
1414
"""
15-
15+
import datetime
1616
import time
1717
from ipware.ip import get_real_ip
1818
from DjangoBlog.utils import cache
19+
from blog.documents import ELASTICSEARCH_ENABLED, ElaspedTimeDocumentManager
1920

2021

2122
class OnlineMiddleware(object):
@@ -31,5 +32,12 @@ def __call__(self, request):
3132
return response
3233

3334
cast_time = time.time() - start_time
35+
if ELASTICSEARCH_ENABLED:
36+
time_taken = round((cast_time) * 1000, 2)
37+
url = request.path
38+
from django.utils import timezone
39+
40+
ElaspedTimeDocumentManager.create(url=url, time_taken=time_taken, log_datetime=timezone.now(),
41+
type='blog')
3442
response.content = response.content.replace(b'<!!LOAD_TIMES!!>', str.encode(str(cast_time)[:5]))
3543
return response

0 commit comments

Comments
 (0)