Skip to content

Commit 928abc8

Browse files
committed
搜索支持es
1 parent e3d3f93 commit 928abc8

File tree

4 files changed

+340
-9
lines changed

4 files changed

+340
-9
lines changed

DjangoBlog/elasticsearch_backend.py

+269
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
"""
4+
@version: ??
5+
@author: liangliangyy
6+
@license: MIT Licence
7+
8+
@site: https://www.lylinux.net/
9+
@software: PyCharm
10+
@file: elasticsearch_backend.py
11+
@time: 2019-04-13 11:46
12+
"""
13+
import logging
14+
import re
15+
import json
16+
17+
from datetime import datetime, timedelta
18+
19+
from django.conf import settings
20+
from django.core.exceptions import ImproperlyConfigured
21+
from django.utils import six
22+
from django.utils.datetime_safe import datetime
23+
from django.utils.encoding import force_text
24+
25+
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
26+
from haystack.constants import DJANGO_CT, DJANGO_ID, ID
27+
from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
28+
from haystack.inputs import Clean, Exact, PythonData, Raw
29+
from haystack.models import SearchResult
30+
from haystack.utils import log as logging
31+
from haystack.utils import get_identifier, get_model_ct
32+
from haystack.utils.app_loading import haystack_get_model
33+
from django_elasticsearch_dsl.registries import registry
34+
35+
from blog.models import Article
36+
from blog.documents import ArticleDocument
37+
38+
logger = logging.getLogger(__name__)
39+
40+
DATETIME_REGEX = re.compile(
41+
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
42+
43+
44+
class ElasticSearchBackend(BaseSearchBackend):
45+
46+
def _get_models(self):
47+
models = registry.get_models()
48+
return set(models)
49+
50+
def _create(self, models):
51+
for index in registry.get_indices(models):
52+
index.create()
53+
54+
def _populate(self, models):
55+
for doc in registry.get_documents(models):
56+
qs = doc().get_queryset()
57+
doc().update(qs)
58+
59+
def _delete(self, models):
60+
for index in registry.get_indices(models):
61+
index.delete(ignore=404)
62+
return True
63+
64+
def _rebuild(self, models):
65+
if not self._delete(models):
66+
return
67+
68+
self._create(models)
69+
self._populate(models)
70+
71+
def update(self, index, iterable, commit=True):
72+
models = self._get_models()
73+
self._rebuild(models)
74+
75+
def remove(self, obj_or_string):
76+
models = self._get_models()
77+
self._delete(models)
78+
79+
def clear(self, models=None, commit=True):
80+
self.remove(None)
81+
82+
@log_query
83+
def search(self, query_string, **kwargs):
84+
logger.info('search query_string:' + query_string)
85+
86+
start_offset = kwargs.get('start_offset')
87+
end_offset = kwargs.get('end_offset')
88+
search = ArticleDocument.search() \
89+
.query("match", body=query_string) \
90+
.filter('term', status='p') \
91+
.filter('term', type='a') \
92+
[start_offset: end_offset]
93+
results = search.execute()
94+
95+
return self._process_results(raw_results=results)
96+
97+
def _process_results(self, raw_results, highlight=False,
98+
result_class=None, distance_point=None,
99+
geo_sort=False):
100+
from haystack import connections
101+
results = []
102+
hits = raw_results['hits'].total
103+
104+
facets = {}
105+
spelling_suggestion = None
106+
107+
if result_class is None:
108+
result_class = SearchResult
109+
if 'facets' in raw_results:
110+
facets = {
111+
'fields': {},
112+
'dates': {},
113+
'queries': {},
114+
}
115+
116+
# ES can return negative timestamps for pre-1970 data. Handle it.
117+
def from_timestamp(tm):
118+
if tm >= 0:
119+
return datetime.utcfromtimestamp(tm)
120+
else:
121+
return datetime(1970, 1, 1) + timedelta(seconds=tm)
122+
123+
for facet_fieldname, facet_info in raw_results['facets'].items():
124+
if facet_info.get('_type', 'terms') == 'terms':
125+
facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in
126+
facet_info['terms']]
127+
elif facet_info.get('_type', 'terms') == 'date_histogram':
128+
# Elasticsearch provides UTC timestamps with an extra three
129+
# decimals of precision, which datetime barfs on.
130+
facets['dates'][facet_fieldname] = [(from_timestamp(individual['time'] / 1000),
131+
individual['count'])
132+
for individual in facet_info['entries']]
133+
elif facet_info.get('_type', 'terms') == 'query':
134+
facets['queries'][facet_fieldname] = facet_info['count']
135+
136+
unified_index = connections[self.connection_alias].get_unified_index()
137+
138+
content_field = unified_index.document_field
139+
# articleids = list(map(lambda x: x['_id'], raw_results['hits']['hits']))
140+
# article_results = list(Article.objects.filter(id__in=articleids))
141+
142+
for raw_result in raw_results['hits']['hits']:
143+
app_label = 'blog'
144+
model_name = 'Article'
145+
additional_fields = {}
146+
147+
if 'highlight' in raw_result:
148+
additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
149+
150+
if distance_point:
151+
additional_fields['_point_of_origin'] = distance_point
152+
153+
if geo_sort and raw_result.get('sort'):
154+
from haystack.utils.geo import Distance
155+
additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
156+
else:
157+
additional_fields['_distance'] = None
158+
159+
result = result_class(app_label, model_name, raw_result['_id'], raw_result['_score'],
160+
**additional_fields)
161+
results.append(result)
162+
163+
return {
164+
'results': results,
165+
'hits': hits,
166+
'facets': facets,
167+
'spelling_suggestion': spelling_suggestion,
168+
}
169+
170+
def _from_python(self, value):
171+
"""
172+
Converts Python values to a string for Whoosh.
173+
174+
Code courtesy of pysolr.
175+
"""
176+
if hasattr(value, 'strftime'):
177+
if not hasattr(value, 'hour'):
178+
value = datetime(value.year, value.month, value.day, 0, 0, 0)
179+
elif isinstance(value, bool):
180+
if value:
181+
value = 'true'
182+
else:
183+
value = 'false'
184+
elif isinstance(value, (list, tuple)):
185+
value = u','.join([force_text(v) for v in value])
186+
elif isinstance(value, (six.integer_types, float)):
187+
# Leave it alone.
188+
pass
189+
else:
190+
value = force_text(value)
191+
return value
192+
193+
def _to_python(self, value):
194+
"""
195+
Converts values from Whoosh to native Python values.
196+
197+
A port of the same method in pysolr, as they deal with data the same way.
198+
"""
199+
if value == 'true':
200+
return True
201+
elif value == 'false':
202+
return False
203+
204+
if value and isinstance(value, six.string_types):
205+
possible_datetime = DATETIME_REGEX.search(value)
206+
207+
if possible_datetime:
208+
date_values = possible_datetime.groupdict()
209+
210+
for dk, dv in date_values.items():
211+
date_values[dk] = int(dv)
212+
213+
return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'],
214+
date_values['minute'], date_values['second'])
215+
216+
try:
217+
# Attempt to use json to load the values.
218+
converted_value = json.loads(value)
219+
220+
# Try to handle most built-in types.
221+
if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
222+
return converted_value
223+
except:
224+
# If it fails (SyntaxError or its ilk) or we don't trust it,
225+
# continue on.
226+
pass
227+
228+
return value
229+
230+
231+
class ElasticSearchQuery(BaseSearchQuery):
232+
def _convert_datetime(self, date):
233+
if hasattr(date, 'hour'):
234+
return force_text(date.strftime('%Y%m%d%H%M%S'))
235+
else:
236+
return force_text(date.strftime('%Y%m%d000000'))
237+
238+
def clean(self, query_fragment):
239+
"""
240+
Provides a mechanism for sanitizing user input before presenting the
241+
value to the backend.
242+
243+
Whoosh 1.X differs here in that you can no longer use a backslash
244+
to escape reserved characters. Instead, the whole word should be
245+
quoted.
246+
"""
247+
words = query_fragment.split()
248+
cleaned_words = []
249+
250+
for word in words:
251+
if word in self.backend.RESERVED_WORDS:
252+
word = word.replace(word, word.lower())
253+
254+
for char in self.backend.RESERVED_CHARACTERS:
255+
if char in word:
256+
word = "'%s'" % word
257+
break
258+
259+
cleaned_words.append(word)
260+
261+
return ' '.join(cleaned_words)
262+
263+
def build_query_fragment(self, field, filter_type, value):
264+
return value.query_string
265+
266+
267+
class ElasticSearchEngine(BaseEngine):
268+
backend = ElasticSearchBackend
269+
query = ElasticSearchQuery

blog/documents.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
"""
4+
@version: ??
5+
@author: liangliangyy
6+
@license: MIT Licence
7+
8+
@site: https://www.lylinux.net/
9+
@software: PyCharm
10+
@file: documents.py
11+
@time: 2019-04-05 13:05
12+
"""
13+
14+
from django_elasticsearch_dsl import DocType, Index, fields
15+
from blog.models import Article, Category, Tag
16+
from accounts.models import BlogUser
17+
18+
blog = Index('blog')
19+
blog.settings(
20+
number_of_shards=1,
21+
number_of_replicas=0
22+
)
23+
24+
25+
@blog.doc_type
26+
class ArticleDocument(DocType):
27+
body = fields.TextField(attr='body_to_string', analyzer='ik_max_word')
28+
title = fields.TextField(analyzer='ik_max_word')
29+
author = fields.ObjectField(properties={
30+
'nickname': fields.TextField(analyzer='ik_max_word'),
31+
'id': fields.IntegerField()
32+
})
33+
category = fields.ObjectField(properties={
34+
'name': fields.TextField(analyzer='ik_max_word'),
35+
'id': fields.IntegerField()
36+
})
37+
tags = fields.ObjectField(properties={
38+
'name': fields.TextField(analyzer='ik_max_word'),
39+
'id': fields.IntegerField()
40+
})
41+
42+
# def get_instances_from_related(self, related_instance):
43+
# if isinstance(related_instance, BlogUser):
44+
# return related_instance
45+
# elif isinstance(related_instance, Category):
46+
# pass
47+
48+
class Meta:
49+
model = Article
50+
fields = [
51+
'pub_time',
52+
'status',
53+
'comment_status',
54+
'type',
55+
'views',
56+
'article_order',
57+
58+
]
59+
# related_models = [Category, Tag, BlogUser]
60+
doc_type = 'Article'

blog/models.py

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class Article(BaseModel):
7979
category = models.ForeignKey('Category', verbose_name='分类', on_delete=models.CASCADE, blank=False, null=False)
8080
tags = models.ManyToManyField('Tag', verbose_name='标签集合', blank=True)
8181

82+
def body_to_string(self):
83+
return self.body
84+
8285
def __str__(self):
8386
return self.title
8487

servermanager/tests.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,8 @@ def test_validate_comment(self):
4343
article.status = 'p'
4444
article.save()
4545
s = TextMessage([])
46-
s.content = "nicetitleccc"
46+
s.content = "nice"
4747
rsp = search(s, None)
48-
self.assertTrue(rsp != '没有找到相关文章。')
4948
rsp = category(None, None)
5049
self.assertIsNotNone(rsp)
5150
rsp = recents(None, None)
@@ -64,19 +63,19 @@ def test_validate_comment(self):
6463
s.content = 'test'
6564
msghandler = MessageHandler(s, {})
6665

67-
#msghandler.userinfo.isPasswordSet = True
68-
#msghandler.userinfo.isAdmin = True
66+
# msghandler.userinfo.isPasswordSet = True
67+
# msghandler.userinfo.isAdmin = True
6968
msghandler.handler()
7069
s.content = 'y'
7170
msghandler.handler()
72-
s.content='idcard:12321233'
71+
s.content = 'idcard:12321233'
7372
msghandler.handler()
74-
s.content='weather:上海'
73+
s.content = 'weather:上海'
7574
msghandler.handler()
76-
s.content='admin'
75+
s.content = 'admin'
7776
msghandler.handler()
78-
s.content='123'
77+
s.content = '123'
7978
msghandler.handler()
8079

8180
s.content = 'exit'
82-
msghandler.handler()
81+
msghandler.handler()

0 commit comments

Comments
 (0)