Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/seasearch add wiki search sup #366

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from seafevents.repo_metadata.index_worker import RepoMetadataIndexWorker
from seafevents.repo_metadata.slow_task_handler import SlowTaskHandler
from seafevents.seafevent_server.seafevent_server import SeafEventServer
from seafevents.app.config import ENABLE_METADATA_MANAGEMENT
from seafevents.app.config import ENABLE_METADATA_MANAGEMENT, ENABLE_WIKI
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个是老版wiki的配置

from seafevents.seasearch.index_task.filename_index_updater import RepoFilenameIndexUpdater
from seafevents.seasearch.index_task.wiki_index_updater import WikiIndexUpdater


class App(object):
Expand Down Expand Up @@ -42,6 +43,8 @@ def __init__(self, config, ccnet_config, seafile_config,
self._index_worker = RepoMetadataIndexWorker(config)
self._slow_task_handler = SlowTaskHandler(config)
self._repo_filename_index_updater = RepoFilenameIndexUpdater(config)
if ENABLE_WIKI:
self._wiki_index_updater = WikiIndexUpdater(config)

def serve_forever(self):
if self._fg_tasks_enabled:
Expand All @@ -66,3 +69,5 @@ def serve_forever(self):
self._index_worker.start()
self._slow_task_handler.start()
self._repo_filename_index_updater.start()
if ENABLE_WIKI:
self._wiki_index_updater.start()
1 change: 1 addition & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
METADATA_SERVER_URL = getattr(seahub_settings, 'METADATA_SERVER_URL', '')
ENABLE_METADATA_MANAGEMENT = getattr(seahub_settings, 'ENABLE_METADATA_MANAGEMENT', False)
METADATA_FILE_TYPES = getattr(seahub_settings, 'METADATA_FILE_TYPES', {})
ENABLE_WIKI = getattr(seahub_settings, 'ENABLE_WIKI', False)
except ImportError:
logger.critical("Can not import seahub settings.")
raise RuntimeError("Can not import seahub settings.")
Expand Down
1 change: 0 additions & 1 deletion repo_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,5 +142,4 @@ def get_virtual_repo_in_repos(self, repo_ids):
logger.error(e)
return self._get_virtual_repo_in_repos(repo_ids)


repo_data = RepoData()
33 changes: 33 additions & 0 deletions seafevent_server/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,36 @@ def search():
results = index_task_manager.keyword_search(query, repos, count, suffixes, search_path)

return {'results': results}, 200


@app.route('/wiki-search', methods=['POST'])
def search_wikis():
is_valid = check_auth_token(request)
if not is_valid:
return {'error_msg': 'Permission denied'}, 403

# Check seasearch is enable
if not index_task_manager.enabled:
return {'error_msg': 'Seasearch is not enabled by seafevents.conf'}
try:
data = json.loads(request.data)
except Exception as e:
logger.exception(e)
return {'error_msg': 'Bad request.'}, 400

query = data.get('query').strip()
wikis = data.get('wikis')

if not query:
return {'error_msg': 'query invalid.'}, 400
if not wikis:
return {'error_msg': 'wikis invalid.'}, 400

try:
count = int(data.get('count'))
except:
count = 20

results = index_task_manager.wiki_search(query, wikis, count)

return {'results': results}, 200
48 changes: 47 additions & 1 deletion seasearch/index_store/index_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from seafevents.seasearch.utils import need_index_metadata_info
from seafevents.db import init_db_session_class
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX, \
WIKI_INDEX_PREFIX
from seafevents.repo_metadata.metadata_server_api import MetadataServerAPI
from seafevents.repo_metadata.utils import METADATA_TABLE
from seafevents.utils import timestamp_to_isoformat_timestr
Expand Down Expand Up @@ -74,3 +75,48 @@ def delete_repo_filename_index(self, repo_id, repo_filename_index, repo_status_f

def keyword_search(self, query, repos, repo_filename_index, count, suffixes=None, search_path=None):
return repo_filename_index.search_files(repos, query, 0, count, suffixes, search_path)

def delete_wiki_index(self, wiki_id, wiki_index, wiki_status_index):
# first delete wiki_index
wiki_index_name = WIKI_INDEX_PREFIX + wiki_id
wiki_index.delete_index_by_index_name(wiki_index_name)
wiki_status_index.delete_documents_by_repo(wiki_id)

def keyword_search(self, query, repos, repo_filename_index, count, suffixes=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

上面有一个 keyword_search 了

return repo_filename_index.search_files(repos, query, 0, count, suffixes)

def wiki_search(self, query, wikis, wiki_index, count):
return wiki_index.search_wikis(wikis, query, 0, count)

def update_wiki_index(self, wiki_id, commit_id, wiki_index, wiki_status_index):
try:
new_commit_id = commit_id
index_name = WIKI_INDEX_PREFIX + wiki_id

wiki_index.create_index_if_missing(index_name)

wiki_status = wiki_status_index.get_repo_status_by_id(wiki_id)
from_commit = wiki_status.from_commit
to_commit = wiki_status.to_commit

if new_commit_id == from_commit:
return

if not from_commit:
commit_id = ZERO_OBJ_ID
else:
commit_id = from_commit

if wiki_status.need_recovery():
logger.warning('%s: wiki index inrecovery', wiki_id)
wiki_index.update(index_name, wiki_id, commit_id, to_commit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

按照现在的逻辑这里还能正常recovery吗?

commit_id = to_commit
time.sleep(1)
wiki_status_index.begin_update_repo(wiki_id, commit_id, new_commit_id)
wiki_index.update(index_name, wiki_id, commit_id, new_commit_id)
wiki_status_index.finish_update_repo(wiki_id, new_commit_id)

logger.info('wiki: %s, update wiki index success', wiki_id)

except Exception as e:
logger.exception('wiki_id: %s, update wiki index error: %s.', wiki_id, e)
2 changes: 2 additions & 0 deletions seasearch/index_store/repo_file_name_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ def search_files(self, repos, keyword, start=0, size=10, suffixes=None, search_p
bulk_search_params.append(data)
search_path = None


logger.debug('search in repo_filename_index params: %s', json.dumps(bulk_search_params))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个去掉

results = self.seasearch_api.m_search(bulk_search_params)
files = []

Expand Down
4 changes: 2 additions & 2 deletions seasearch/index_store/repo_status_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def check_repo_status(self, repo_id):
return self.seasearch_api.check_document_by_id(self.index_name, repo_id).get('is_exist')

def add_repo_status(self, repo_id, commit_id, updatingto, metadata_updated_time):
date = {
data = {
'repo_id': repo_id,
'commit_id': commit_id,
'updatingto': updatingto,
'metadata_updated_time': metadata_updated_time,
}
doc_id = repo_id
self.seasearch_api.create_document_by_id(self.index_name, doc_id, date)
self.seasearch_api.create_document_by_id(self.index_name, doc_id, data)

def begin_update_repo(self, repo_id, old_commit_id, new_commit_id, metadata_updated_time):
self.add_repo_status(repo_id, old_commit_id, new_commit_id, metadata_updated_time)
Expand Down
Loading
Loading