Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,26 @@ pip install git+git://github.com/lzjun567/zhihu-api --upgrade
取消关注专栏成功
```

**搜索问题**
```
from zhihu import Search
search = Search()
search.search_question('python', 20)
搜索问题“python”的前20个结果 (问题id和问题标题)
```

**搜索用户**
```
search.search_people('python', 20)
搜索用户“python”的前20个结果 (用户id和用户名)
```

**搜索话题**
```
search.search_topic('python', 20)
搜索话题“python”的前20个结果 (话题id和话题名称)
```

每个接口都提供了不只一种方式调用,更多参考单元测试里面的例子


Expand Down
31 changes: 31 additions & 0 deletions test/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# encoding: utf-8

from zhihu import Search
import unittest
import time


class SearchTestCase(unittest.TestCase):
def setUp(self):
self.search = Search()

def test_search_question(self):
time.sleep(1)
ids, titles = self.search.search_question('python', 20)
self.assertEqual(len(ids), len(titles), msg="The numbers of IDs and titles we get should be the same.")
self.assertEqual(len(ids), 20, msg="We should get 20 IDs.")
self.assertEqual(len(ids), len(set(ids)), msg="All the IDs should be unique.")

def test_search_people(self):
time.sleep(1)
ids, names = self.search.search_people('python', 20)
self.assertEqual(len(ids), len(names), msg="The numbers of IDs and names we get should be the same.")
self.assertEqual(len(ids), 20, msg="We should get 20 IDs.")
self.assertEqual(len(ids), len(set(ids)), msg="All the IDs should be unique.")

def test_search_topic(self):
time.sleep(1)
ids, topics = self.search.search_topic('python', 20)
self.assertEqual(len(ids), len(topics), msg="The numbers of IDs and topics we get should be the same.")
self.assertEqual(len(ids), 20, msg="We should get 20 IDs.")
self.assertEqual(len(ids), len(set(ids)), msg="All the IDs should be unique.")
2 changes: 2 additions & 0 deletions zhihu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from zhihu.models import question
from zhihu.models import column
from zhihu.models import account
from zhihu.models import search

__version__ = '0.0.1'
__author__ = 'liuzhijun'
Expand All @@ -15,3 +16,4 @@
Question = question.Question
Column = column.Column
Account = account.Account
Search = search.Search
126 changes: 126 additions & 0 deletions zhihu/models/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# encoding: utf-8

from bs4 import BeautifulSoup

from zhihu.auth import need_login
from zhihu.error import ZhihuError
from zhihu.models import Model
from zhihu.url import URL


class Search(Model):
@need_login
def search_question(self, key_words='', number_of_results=10):
"""
搜索内容
:param key_words: 搜索关键词
:param number_of_results: 想要获得的结果数量
:return: (ids, titles) 问题id与题目
>>> search = Search()
>>> search.search_question('python', 20)
"""
html = self._get_search_response(search_type='content', key_words=key_words)
ids, titles = self._extract_questions(html)
offset = 10
while len(titles) < number_of_results:
html = self._get_search_response(search_type='content', key_words=key_words, get_more=True, offset=offset)
if not html:
break
new_ids, new_titles = self._extract_questions(html, get_more=True)
ids += new_ids
titles += new_titles
offset += 10
return ids[:number_of_results], titles[:number_of_results]

@need_login
def search_people(self, key_words='', number_of_results=10):
"""
搜索内容
:param key_words: 搜索关键词
:param number_of_results: 想要获得的结果数量
:return: (ids, titles) 用户id与用户名
>>> search = Search()
>>> search.search_people('python', 20)
"""
html = self._get_search_response(search_type='people', key_words=key_words)
ids, names = self._extract_people(html)
offset = 10
while len(names) < number_of_results:
html = self._get_search_response(search_type='people', key_words=key_words, get_more=True, offset=offset)
if not html:
break
new_ids, new_names = self._extract_people(html, get_more=True)
ids += new_ids
names += new_names
offset += 10
return ids[:number_of_results], names[:number_of_results]

@need_login
def search_topic(self, key_words='', number_of_results=10):
"""
搜索内容
:param key_words: 搜索关键词
:param number_of_results: 想要获得的结果数量
:return: (ids, titles) 话题id与话题名
>>> search = Search()
>>> search.search_topic('python', 20)
"""
html = self._get_search_response(search_type='topic', key_words=key_words)
ids, topics = self._extract_topics(html)
offset = 10
while len(topics) < number_of_results:
html = self._get_search_response(search_type='topic', key_words=key_words, get_more=True, offset=offset)
if not html:
break
new_ids, new_topics = self._extract_topics(html, get_more=True)
ids += new_ids
topics += new_topics
offset += 10
return ids[:number_of_results], topics[:number_of_results]

def _get_search_response(self, search_type='content', key_words='', get_more=False, offset=0):
if get_more:
params = {
'type': search_type,
'q': key_words,
'correction': 0,
'offset': offset
}
response = self._session.get(URL.search(get_more=True), params=params)
return "".join(response.json()['htmls'])
else:
response = self._session.get(URL.search(), params={'type': search_type, 'q': key_words})
return response.text

@staticmethod
def _extract_questions(html, get_more=False):
soup = BeautifulSoup(html, 'html.parser')
if get_more:
parent = soup
else:
parent = soup.find('ul', {'class': ['list', 'contents', 'navigable']})
ids = [li.div.a['href'][10:] for li in parent.findChildren(recursive=False) if "question" in li.div.a['href']]
titles = [li.div.a.getText() for li in parent.findChildren(recursive=False) if "question" in li.div.a['href']]
return ids, titles

@staticmethod
def _extract_people(html, get_more=False):
soup = BeautifulSoup(html, 'html.parser')
if get_more:
parent = soup
else:
parent = soup.find('ul', {'class': ['list', 'users']})
ids = [li.div.div.div.a['href'][8:] for li in parent.findChildren(recursive=False)]
titles = [li.div.div.div.a.getText() for li in parent.findChildren(recursive=False)]
return ids, titles

@staticmethod
def _extract_topics(html, get_more=False):
soup = BeautifulSoup(html, 'html.parser')
if get_more:
parent = soup
else:
parent = soup.find('ul', {'class': ['list', 'topics']})
ids = [li.div.div.a['href'][7:] for li in parent.findChildren(recursive=False)]
titles = [li.div.div.a.getText() for li in parent.findChildren(recursive=False)]
return ids, titles
5 changes: 5 additions & 0 deletions zhihu/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,8 @@ def register_validate():
@staticmethod
def register():
return URL.host + "/register/phone_num"

# 搜索内容/用户/话题
@staticmethod
def search(get_more=False):
return URL.host + "/search" if not get_more else URL.host + "/r/search"