Browse Source

scraper,renderer: Support new API

master
Jakub Valenta 5 months ago
parent
commit
d24c46ef0f
  1. 70
      novinky_polls/renderer.py
  2. 44
      novinky_polls/scraper.py
  3. 37
      novinky_polls/tests/test_data/6314d7716ce377c94e5d9feb.json
  4. 48
      novinky_polls/tests/test_rendered.py

70
novinky_polls/renderer.py

@ -56,40 +56,46 @@ def convert_poll_to_template_context(
c_poll['date'] = format_date(date, 'd. MMMM yyyy', locale=_locale)
if not data:
c_poll['notEmpty'] = False
else:
return c_poll
if 'inquiry' in data:
# Until 2022-08-23
raw_poll_json = data['inquiry'][0]
question = (
raw_poll_json['question']
if 'question' in raw_poll_json
else raw_poll_json['title']
else:
# Since 2022-08-23
raw_poll_json = data
if 'question' in raw_poll_json:
# Since 2019-09
question = raw_poll_json['question']
else:
# Until 2019-09
question = raw_poll_json['title']
c_poll['title'] = fix_line_breaks(question, char_nbsp)
c_poll['answers'] = []
answers = [
Answer(
text=(x['answer'] if 'answer' in x else x['text']),
votes=(x['votes'] if 'votes' in x else x['voteCount']),
)
for x in raw_poll_json['answers']
]
sum_votes = sum(answer.votes for answer in answers)
i = 0
for answer in answers:
answer_perc = answer.votes / sum_votes if sum_votes else 0
c_poll['answers'].append(
{
'text': fix_line_breaks(answer.text, char_nbsp),
'pc': round(answer_perc * 100, 1),
'pc_formatted': format_decimal(
answer_perc * 100, '#0.#', locale=_locale
),
'width': round(answer_perc, 2),
'even': i,
}
)
c_poll['title'] = fix_line_breaks(question, char_nbsp)
c_poll['answers'] = []
answers = [
Answer(
text=(x['answer'] if 'answer' in x else x['text']),
votes=(x['votes'] if 'votes' in x else x['voteCount']),
)
for x in raw_poll_json['answers']
]
sum_votes = sum(answer.votes for answer in answers)
i = 0
for answer in answers:
answer_perc = answer.votes / sum_votes if sum_votes else 0
c_poll['answers'].append(
{
'text': fix_line_breaks(answer.text, char_nbsp),
'pc': round(answer_perc * 100, 1),
'pc_formatted': format_decimal(
answer_perc * 100, '#0.#', locale=_locale
),
'width': round(answer_perc, 2),
'even': i,
}
)
i = i ^ 1
c_poll['sumCount'] = sum_votes
c_poll['notEmpty'] = True
i = i ^ 1
c_poll['sumCount'] = sum_votes
c_poll['notEmpty'] = True
return c_poll

44
novinky_polls/scraper.py

@ -24,7 +24,6 @@ HEADERS = {
'Cache-Control': 'no-cache',
'Referer': 'https://www.novinky.cz/',
}
URL = 'https://www.novinky.cz/inquiry/screen?inquiryIds={}'
class InvalidJSONError(Exception):
@ -93,23 +92,28 @@ def _parse_component(component: dict) -> Optional[dict]:
return None
def _parse_poll_from_raw_json(poll_json_raw: dict) -> Poll:
poll_id = poll_json_raw['_importId']
poll_json_str = json.dumps({'inquiry': [poll_json_raw]})
return Poll(poll_id=poll_id, json_str=poll_json_str)
def parse_poll_from_ima_cache(ima_cache: dict) -> Poll:
for url, data in ima_cache.items():
if not url.startswith('http.get:https://www.novinky.cz/api/layouts/'):
continue
for components in data['value']['body'].values():
if not isinstance(components, list):
continue
for component in components:
poll_json_raw = _parse_component(component)
if poll_json_raw is not None:
return _parse_poll_from_raw_json(poll_json_raw)
if url.startswith('http.get:https://api-web.novinky.cz/v1/polls'):
# Since 2022-08-23
for component in data['value']['body']:
if component['_cls'] == 'Poll':
return Poll(
poll_id=component['_id'],
json_str=json.dumps(component),
)
elif url.startswith('http.get:https://www.novinky.cz/api/layouts/'):
# Until 2022-08-23
for components in data['value']['body'].values():
if not isinstance(components, list):
continue
for component in components:
poll_json_raw = _parse_component(component)
if poll_json_raw is not None:
return Poll(
poll_id=poll_json_raw['_importId'],
json_str=json.dumps({'inquiry': [poll_json_raw]}),
)
raise ParserError('Failed to find poll in IMA.Cache')
@ -137,7 +141,13 @@ def download_poll_by_id(
return cache[poll_id]
logger.info('Downloading poll %s', poll_id)
r = requests.get(URL.format(poll_id), headers=HEADERS)
if len(poll_id) > 10:
# Since 2022-08-23
url = f'https://api-web.novinky.cz/v1/polls/{poll_id}'
else:
# Until 2022-08-23
url = f'https://www.novinky.cz/inquiry/screen?inquiryIds={poll_id}'
r = requests.get(url, headers=HEADERS)
r.raise_for_status()
json_str = r.text

37
novinky_polls/tests/test_data/6314d7716ce377c94e5d9feb.json

@ -0,0 +1,37 @@
{
"_cls": "Poll",
"_created": "2022-09-04 18:50:57",
"_id": "6314d7716ce377c94e5d9feb",
"_updated": "2022-09-04 18:50:57",
"answers": [
{
"_cls": "cns_module_poll.model._PollAnswer",
"answer": "Ano",
"id": "6314d7716ce377c94e5d9fec",
"votes": 9713
},
{
"_cls": "cns_module_poll.model._PollAnswer",
"answer": "M\u011blo by pom\u00e1hat v\u00edc",
"id": "6314d7716ce377c94e5d9fed",
"votes": 4099
},
{
"_cls": "cns_module_poll.model._PollAnswer",
"answer": "M\u011blo by pom\u00e1hat m\u00ed\u0148",
"id": "6314d7716ce377c94e5d9fee",
"votes": 17419
}
],
"question": "Pom\u00e1h\u00e1 podle v\u00e1s \u010cesko Ukrajin\u011b v r\u00e1mci sv\u00fdch mo\u017enost\u00ed dostate\u010dn\u011b?",
"uid": 19527,
"_meta": {
"count": 1,
"limit": 18,
"nextItems": false,
"offset": 0,
"previousItems": false,
"previousItemExists": false,
"nextItemExists": false
}
}

48
novinky_polls/tests/test_rendered.py

@ -12,7 +12,7 @@ test_data_dir = Path(__file__).parent / 'test_data'
class TestRendered(TestCase):
maxDiff = None
def test_convert_poll_to_template_context_old(self):
def test_convert_poll_to_template_context_before_2019_09(self):
with (test_data_dir / '18832.json').open() as f:
poll_json = json.load(f)
poll = (None, None, poll_json)
@ -24,14 +24,14 @@ class TestRendered(TestCase):
{
'pc': 31.7,
'text': 'Ano',
'pcFormatted': '31.7%',
'pc_formatted': '31.7',
'width': 0.32,
'even': 0,
},
{
'pc': 68.3,
'text': 'Ne',
'pcFormatted': '68.3%',
'pc_formatted': '68.3',
'width': 0.68,
'even': 1,
},
@ -41,7 +41,7 @@ class TestRendered(TestCase):
}
self.assertDictEqual(result, expected)
def test_convert_poll_to_template_context(self):
def test_convert_poll_to_template_context_before_2022_08_23(self):
with (test_data_dir / '19023.json').open() as f:
poll_json = json.load(f)
poll = (None, None, poll_json)
@ -53,14 +53,14 @@ class TestRendered(TestCase):
{
'pc': 62.8,
'text': 'Ano',
'pcFormatted': '62.8%',
'pc_formatted': '62.8',
'width': 0.63,
'even': 0,
},
{
'pc': 37.2,
'text': 'Ne',
'pcFormatted': '37.2%',
'pc_formatted': '37.2',
'width': 0.37,
'even': 1,
},
@ -69,3 +69,39 @@ class TestRendered(TestCase):
'notEmpty': True,
}
self.assertDictEqual(result, expected)
def test_convert_poll_to_template_context(self):
with (test_data_dir / '6314d7716ce377c94e5d9feb.json').open() as f:
poll_json = json.load(f)
poll = (None, None, poll_json)
result = convert_poll_to_template_context(poll)
expected = {
'date': '',
'title': 'Pomáhá podle vás Česko Ukrajině v rámci svých možností dostatečně?', # noqa
'answers': [
{
'pc': 31.1,
'text': 'Ano',
'pc_formatted': '31.1',
'width': 0.31,
'even': 0,
},
{
'pc': 13.1,
'text': 'Mělo by pomáhat víc',
'pc_formatted': '13.1',
'width': 0.13,
'even': 1,
},
{
'pc': 55.8,
'text': 'Mělo by pomáhat míň',
'pc_formatted': '55.8',
'width': 0.56,
'even': 0,
},
],
'sumCount': 31231,
'notEmpty': True,
}
self.assertDictEqual(result, expected)

Loading…
Cancel
Save