commit
8899fea093
27 changed files with 1447 additions and 0 deletions
@ -0,0 +1,5 @@ |
|||
__pycache__ |
|||
build |
|||
dist |
|||
*.DS_Store |
|||
*.egg-info |
@ -0,0 +1,202 @@ |
|||
|
|||
Apache License |
|||
Version 2.0, January 2004 |
|||
http://www.apache.org/licenses/ |
|||
|
|||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
|||
|
|||
1. Definitions. |
|||
|
|||
"License" shall mean the terms and conditions for use, reproduction, |
|||
and distribution as defined by Sections 1 through 9 of this document. |
|||
|
|||
"Licensor" shall mean the copyright owner or entity authorized by |
|||
the copyright owner that is granting the License. |
|||
|
|||
"Legal Entity" shall mean the union of the acting entity and all |
|||
other entities that control, are controlled by, or are under common |
|||
control with that entity. For the purposes of this definition, |
|||
"control" means (i) the power, direct or indirect, to cause the |
|||
direction or management of such entity, whether by contract or |
|||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
|||
outstanding shares, or (iii) beneficial ownership of such entity. |
|||
|
|||
"You" (or "Your") shall mean an individual or Legal Entity |
|||
exercising permissions granted by this License. |
|||
|
|||
"Source" form shall mean the preferred form for making modifications, |
|||
including but not limited to software source code, documentation |
|||
source, and configuration files. |
|||
|
|||
"Object" form shall mean any form resulting from mechanical |
|||
transformation or translation of a Source form, including but |
|||
not limited to compiled object code, generated documentation, |
|||
and conversions to other media types. |
|||
|
|||
"Work" shall mean the work of authorship, whether in Source or |
|||
Object form, made available under the License, as indicated by a |
|||
copyright notice that is included in or attached to the work |
|||
(an example is provided in the Appendix below). |
|||
|
|||
"Derivative Works" shall mean any work, whether in Source or Object |
|||
form, that is based on (or derived from) the Work and for which the |
|||
editorial revisions, annotations, elaborations, or other modifications |
|||
represent, as a whole, an original work of authorship. For the purposes |
|||
of this License, Derivative Works shall not include works that remain |
|||
separable from, or merely link (or bind by name) to the interfaces of, |
|||
the Work and Derivative Works thereof. |
|||
|
|||
"Contribution" shall mean any work of authorship, including |
|||
the original version of the Work and any modifications or additions |
|||
to that Work or Derivative Works thereof, that is intentionally |
|||
submitted to Licensor for inclusion in the Work by the copyright owner |
|||
or by an individual or Legal Entity authorized to submit on behalf of |
|||
the copyright owner. For the purposes of this definition, "submitted" |
|||
means any form of electronic, verbal, or written communication sent |
|||
to the Licensor or its representatives, including but not limited to |
|||
communication on electronic mailing lists, source code control systems, |
|||
and issue tracking systems that are managed by, or on behalf of, the |
|||
Licensor for the purpose of discussing and improving the Work, but |
|||
excluding communication that is conspicuously marked or otherwise |
|||
designated in writing by the copyright owner as "Not a Contribution." |
|||
|
|||
"Contributor" shall mean Licensor and any individual or Legal Entity |
|||
on behalf of whom a Contribution has been received by Licensor and |
|||
subsequently incorporated within the Work. |
|||
|
|||
2. Grant of Copyright License. Subject to the terms and conditions of |
|||
this License, each Contributor hereby grants to You a perpetual, |
|||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|||
copyright license to reproduce, prepare Derivative Works of, |
|||
publicly display, publicly perform, sublicense, and distribute the |
|||
Work and such Derivative Works in Source or Object form. |
|||
|
|||
3. Grant of Patent License. Subject to the terms and conditions of |
|||
this License, each Contributor hereby grants to You a perpetual, |
|||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
|||
(except as stated in this section) patent license to make, have made, |
|||
use, offer to sell, sell, import, and otherwise transfer the Work, |
|||
where such license applies only to those patent claims licensable |
|||
by such Contributor that are necessarily infringed by their |
|||
Contribution(s) alone or by combination of their Contribution(s) |
|||
with the Work to which such Contribution(s) was submitted. If You |
|||
institute patent litigation against any entity (including a |
|||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
|||
or a Contribution incorporated within the Work constitutes direct |
|||
or contributory patent infringement, then any patent licenses |
|||
granted to You under this License for that Work shall terminate |
|||
as of the date such litigation is filed. |
|||
|
|||
4. Redistribution. You may reproduce and distribute copies of the |
|||
Work or Derivative Works thereof in any medium, with or without |
|||
modifications, and in Source or Object form, provided that You |
|||
meet the following conditions: |
|||
|
|||
(a) You must give any other recipients of the Work or |
|||
Derivative Works a copy of this License; and |
|||
|
|||
(b) You must cause any modified files to carry prominent notices |
|||
stating that You changed the files; and |
|||
|
|||
(c) You must retain, in the Source form of any Derivative Works |
|||
that You distribute, all copyright, patent, trademark, and |
|||
attribution notices from the Source form of the Work, |
|||
excluding those notices that do not pertain to any part of |
|||
the Derivative Works; and |
|||
|
|||
(d) If the Work includes a "NOTICE" text file as part of its |
|||
distribution, then any Derivative Works that You distribute must |
|||
include a readable copy of the attribution notices contained |
|||
within such NOTICE file, excluding those notices that do not |
|||
pertain to any part of the Derivative Works, in at least one |
|||
of the following places: within a NOTICE text file distributed |
|||
as part of the Derivative Works; within the Source form or |
|||
documentation, if provided along with the Derivative Works; or, |
|||
within a display generated by the Derivative Works, if and |
|||
wherever such third-party notices normally appear. The contents |
|||
of the NOTICE file are for informational purposes only and |
|||
do not modify the License. You may add Your own attribution |
|||
notices within Derivative Works that You distribute, alongside |
|||
or as an addendum to the NOTICE text from the Work, provided |
|||
that such additional attribution notices cannot be construed |
|||
as modifying the License. |
|||
|
|||
You may add Your own copyright statement to Your modifications and |
|||
may provide additional or different license terms and conditions |
|||
for use, reproduction, or distribution of Your modifications, or |
|||
for any such Derivative Works as a whole, provided Your use, |
|||
reproduction, and distribution of the Work otherwise complies with |
|||
the conditions stated in this License. |
|||
|
|||
5. Submission of Contributions. Unless You explicitly state otherwise, |
|||
any Contribution intentionally submitted for inclusion in the Work |
|||
by You to the Licensor shall be under the terms and conditions of |
|||
this License, without any additional terms or conditions. |
|||
Notwithstanding the above, nothing herein shall supersede or modify |
|||
the terms of any separate license agreement you may have executed |
|||
with Licensor regarding such Contributions. |
|||
|
|||
6. Trademarks. This License does not grant permission to use the trade |
|||
names, trademarks, service marks, or product names of the Licensor, |
|||
except as required for reasonable and customary use in describing the |
|||
origin of the Work and reproducing the content of the NOTICE file. |
|||
|
|||
7. Disclaimer of Warranty. Unless required by applicable law or |
|||
agreed to in writing, Licensor provides the Work (and each |
|||
Contributor provides its Contributions) on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
|||
implied, including, without limitation, any warranties or conditions |
|||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
|||
PARTICULAR PURPOSE. You are solely responsible for determining the |
|||
appropriateness of using or redistributing the Work and assume any |
|||
risks associated with Your exercise of permissions under this License. |
|||
|
|||
8. Limitation of Liability. In no event and under no legal theory, |
|||
whether in tort (including negligence), contract, or otherwise, |
|||
unless required by applicable law (such as deliberate and grossly |
|||
negligent acts) or agreed to in writing, shall any Contributor be |
|||
liable to You for damages, including any direct, indirect, special, |
|||
incidental, or consequential damages of any character arising as a |
|||
result of this License or out of the use or inability to use the |
|||
Work (including but not limited to damages for loss of goodwill, |
|||
work stoppage, computer failure or malfunction, or any and all |
|||
other commercial damages or losses), even if such Contributor |
|||
has been advised of the possibility of such damages. |
|||
|
|||
9. Accepting Warranty or Additional Liability. While redistributing |
|||
the Work or Derivative Works thereof, You may choose to offer, |
|||
and charge a fee for, acceptance of support, warranty, indemnity, |
|||
or other liability obligations and/or rights consistent with this |
|||
License. However, in accepting such obligations, You may act only |
|||
on Your own behalf and on Your sole responsibility, not on behalf |
|||
of any other Contributor, and only if You agree to indemnify, |
|||
defend, and hold each Contributor harmless for any liability |
|||
incurred by, or claims asserted against, such Contributor by reason |
|||
of your accepting any such warranty or additional liability. |
|||
|
|||
END OF TERMS AND CONDITIONS |
|||
|
|||
APPENDIX: How to apply the Apache License to your work. |
|||
|
|||
To apply the Apache License to your work, attach the following |
|||
boilerplate notice, with the fields enclosed by brackets "[]" |
|||
replaced with your own identifying information. (Don't include |
|||
the brackets!) The text should be enclosed in the appropriate |
|||
comment syntax for the file format. We also recommend that a |
|||
file or class name and description of purpose be included on the |
|||
same "printed page" as the copyright notice for easier |
|||
identification within third-party archives. |
|||
|
|||
Copyright [yyyy] [name of copyright owner] |
|||
|
|||
Licensed under the Apache License, Version 2.0 (the "License"); |
|||
you may not use this file except in compliance with the License. |
|||
You may obtain a copy of the License at |
|||
|
|||
http://www.apache.org/licenses/LICENSE-2.0 |
|||
|
|||
Unless required by applicable law or agreed to in writing, software |
|||
distributed under the License is distributed on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
See the License for the specific language governing permissions and |
|||
limitations under the License. |
@ -0,0 +1,13 @@ |
|||
Copyright 2015 Jakub Valenta |
|||
|
|||
Licensed under the Apache License, Version 2.0 (the "License"); |
|||
you may not use this file except in compliance with the License. |
|||
You may obtain a copy of the License at |
|||
|
|||
http://www.apache.org/licenses/LICENSE-2.0 |
|||
|
|||
Unless required by applicable law or agreed to in writing, software |
|||
distributed under the License is distributed on an "AS IS" BASIS, |
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
See the License for the specific language governing permissions and |
|||
limitations under the License. |
@ -0,0 +1,120 @@ |
|||
# Novinky Polls |
|||
|
|||
Download and render (as HTML, TeX, or plain text) all Novinky.cz's polls for the year 2015. |
|||
|
|||
## Installation |
|||
|
|||
This software requires Python 3. See [Pythons's website](https://www.python.org/) for installation instructions. |
|||
|
|||
When you have Python 3 installed, install required packages with pip (Python's package management system): |
|||
|
|||
``` |
|||
pip install requests |
|||
pip install beautifulsoup4 |
|||
pip install pystache |
|||
``` |
|||
|
|||
Then you can call the executables: |
|||
|
|||
``` |
|||
./novinky-polls-add-archive-org -h |
|||
./novinky-polls-add-current -h |
|||
./novinky-polls-render-html -h |
|||
./novinky-polls-render-print -h |
|||
./novinky-polls-render-text -h |
|||
./novinky-polls-analyze -h |
|||
``` |
|||
|
|||
Or you can install this software as a Python package, which will also install all the dependencies and make the executables available globally: |
|||
|
|||
``` |
|||
python setup.py install |
|||
|
|||
novinky-polls-add-archive-org -h |
|||
novinky-polls-add-current -h |
|||
novinky-polls-render-html -h |
|||
novinky-polls-render-print -h |
|||
novinky-polls-render-text -h |
|||
novinky-polls-analyze -h |
|||
``` |
|||
|
|||
## Usage |
|||
|
|||
Each Novinky.cz's poll is identified by a unique ID. The poll data itself (title, answers, percents) are accessible using a public JSON API (for an example of an HTTP request to retrieve this data see [novinky_polls/test/curl_one_poll_json.sh](./novinky_polls/test/curl_one_poll_json.sh)). The poll data however do not include information on when (date and time) the poll was present at Novinky.cz's homepage. Therefore we need to get this information elsewhere -- either from Archive.org or by checking current Novinky.cz's homepage and saving current timestamp. |
|||
|
|||
This software works in two phases: |
|||
|
|||
### 1. Create a map file mapping poll IDs to timestamps |
|||
|
|||
Add poll IDs and timestamps archived by Archive.org to a map file: |
|||
|
|||
``` |
|||
novinky-polls-add-archive-org -i my_polls_map.txt |
|||
``` |
|||
|
|||
Add current poll ID with current timestamp to a map file: |
|||
|
|||
``` |
|||
novinky-polls-add-current.py -i my_polls_map.txt |
|||
``` |
|||
|
|||
The map file `my_polls_map.txt` now contains a map of poll IDs to timestamps: |
|||
|
|||
``` |
|||
20150101190335 13678 |
|||
20150101190338 13678 |
|||
20150102153411 None |
|||
20150102202406 13677 |
|||
20150102202408 13677 |
|||
... |
|||
``` |
|||
|
|||
Value `None` means that there was no poll in the time of the timestamp. |
|||
|
|||
### 2. Download and render the polls |
|||
|
|||
Once you have the map file, you can download the polls and render them in various formats. |
|||
|
|||
#### HTML |
|||
|
|||
``` |
|||
novinky-polls-render-html -c my_cache_dir -i my_polls_map.txt -o my_polls_export.html -l cs_CZ.utf8 |
|||
``` |
|||
|
|||
#### TeX |
|||
|
|||
``` |
|||
novinky-polls-render-print -c my_cache_dir -i my_polls_map.txt -o my_polls_export.tex -l cs_CZ.utf8 |
|||
``` |
|||
|
|||
The TeX file is meant to be exported to PDF using [lualatex](http://luatex.org/). |
|||
|
|||
#### Plain text |
|||
|
|||
``` |
|||
novinky-polls-render-text -c my_cache_dir -i my_polls_map.txt -o my_polls_export.txt -l cs_CZ.utf8 |
|||
``` |
|||
|
|||
### Categorize polls and print the statistics |
|||
|
|||
``` |
|||
novinky-polls-analyze -c my_cache_dir -i my_polls_map.txt |
|||
``` |
|||
|
|||
## Help |
|||
|
|||
Call any of the scripts mentioned in [Usage](#usage) with the parameter `-h` or `--help` to see full documentation. Example: |
|||
|
|||
``` |
|||
novinky-polls-add-current -h |
|||
``` |
|||
|
|||
## Contributing |
|||
|
|||
__Feel free to remix this piece of software.__ See [NOTICE](./NOTICE) and [LICENSE](./LICENSE) for license information. |
|||
|
|||
You might find these reusable modules useful: |
|||
|
|||
- `scraper.py` contains functions to download Novinky.cz's homepage and parse the ID of the current poll. It also contains functions to download a poll from Novinky.cz's public JSON API. |
|||
- `analyzer.py` contains function to parse Novinky.cz's poll JSON data and to read and write the map of poll IDs and dates. |
|||
- `reader.py` puts `scraper.py` and `analyzer.py` together. Use it's functions to retrieve the data of all polls mentioned in the poll map. |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.add_archive_org |
|||
if __name__ == '__main__': |
|||
novinky_polls.add_archive_org.main() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.add_current |
|||
if __name__ == '__main__': |
|||
novinky_polls.add_current.main() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.analyze |
|||
if __name__ == '__main__': |
|||
novinky_polls.analyze.main() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.reader |
|||
if __name__ == '__main__': |
|||
novinky_polls.reader.main_refresh_polls() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.render_html |
|||
if __name__ == '__main__': |
|||
novinky_polls.render_html.main() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.render_print |
|||
if __name__ == '__main__': |
|||
novinky_polls.render_print.main() |
@ -0,0 +1,5 @@ |
|||
#!/usr/bin/env python |
|||
|
|||
import novinky_polls.render_text |
|||
if __name__ == '__main__': |
|||
novinky_polls.render_text.main() |
@ -0,0 +1,75 @@ |
|||
import datetime |
|||
import re |
|||
import sys |
|||
|
|||
import requests |
|||
from bs4 import BeautifulSoup |
|||
|
|||
from novinky_polls import analyzer |
|||
from novinky_polls import scraper |
|||
|
|||
|
|||
DEFAULT_ARCHIVE_ORG_URL =\ |
|||
'https://web.archive.org/web/20151101074635*/http://www.novinky.cz/' |
|||
DEFAULT_LIMIT = -1 |
|||
|
|||
|
|||
def download_and_parse_archive_org_urls(url): |
|||
r = requests.get(url) |
|||
soup = BeautifulSoup(r.text, 'html.parser') |
|||
urls = [] |
|||
for captures in soup.find_all(class_='pop'): |
|||
for link in captures.find_all('a'): |
|||
urls.append(link['href']) |
|||
return urls |
|||
|
|||
|
|||
def read_poll_map_from_archive_org(archive_org_url, limit): |
|||
urls = download_and_parse_archive_org_urls(archive_org_url) |
|||
polls = {} |
|||
i = limit |
|||
for url in urls: |
|||
if i == 0: |
|||
break |
|||
i = i - 1 |
|||
id = scraper.download_page_and_parse_poll_id( |
|||
'https://web.archive.org' + url |
|||
) |
|||
if not id or id not in polls.values(): |
|||
m = re.search('\d+', url) |
|||
date = datetime.datetime.strptime(m.group(0), '%Y%m%d%H%M%S') |
|||
polls[date] = id |
|||
return polls |
|||
|
|||
|
|||
def add_archive_org(args): |
|||
polls = read_poll_map_from_archive_org(args.url, args.limit) |
|||
analyzer.map_add(args.inputfile, polls, args.outputfile) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Add poll IDs and timestamps archived by' |
|||
' Archive.org to a map file mapping poll IDs to timestamps.' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--output', '-o', dest='outputfile', |
|||
help='output map file path; optional, if not specified' |
|||
' the map is written back to the input file') |
|||
parser.add_argument('--url', '-u', dest='url', |
|||
default=DEFAULT_ARCHIVE_ORG_URL, |
|||
help='URL of Novinky.cz\'s archive at Archive.org;' |
|||
' optional, if not specified {} is used' |
|||
.format(DEFAULT_ARCHIVE_ORG_URL)) |
|||
parser.add_argument('--limit', '-l', dest='limit', type=int, |
|||
default=DEFAULT_LIMIT, |
|||
help='maximum Archive.org entries to download, set to' |
|||
' -1 to download all entries; optional, if not' |
|||
' specified {} is used' |
|||
.format(DEFAULT_LIMIT)) |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(add_archive_org(args)) |
@ -0,0 +1,36 @@ |
|||
import datetime |
|||
import sys |
|||
|
|||
from novinky_polls import analyzer |
|||
from novinky_polls import scraper |
|||
|
|||
DEFAULT_URL = 'http://www.novinky.cz/' |
|||
|
|||
|
|||
def add_current(args): |
|||
polls = {} |
|||
id = scraper.download_page_and_parse_poll_id(args.url) |
|||
date = datetime.datetime.today() |
|||
polls[date] = id |
|||
analyzer.map_add(args.inputfile, polls, args.outputfile) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Add current poll ID with current timestamps to' |
|||
' a map file mapping poll IDs to timestamps.' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--output', '-o', dest='outputfile', |
|||
help='output map file path; optional, if not specified' |
|||
' the map is written back to the input file') |
|||
parser.add_argument('--url', '-u', dest='url', default=DEFAULT_URL, |
|||
help='URL of Novinky.cz\'s homepage; optional, if not' |
|||
' specified {} is used' |
|||
.format(DEFAULT_URL)) |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(add_current(args)) |
@ -0,0 +1,220 @@ |
|||
import re |
|||
import sys |
|||
|
|||
from novinky_polls import reader |
|||
|
|||
|
|||
TAG_RULES = { |
|||
'česko': { |
|||
'evropská unie': ( |
|||
'unie', |
|||
'unii', |
|||
'jednotné evropské armád', |
|||
), |
|||
'kauzy': ( |
|||
'babiš', |
|||
'konvoj', |
|||
'ameri.+raket', |
|||
'hav(el|la).+muze', |
|||
'kajín', |
|||
'sudet', |
|||
'češk.+zachráněn', |
|||
'radě hlavního města', |
|||
'šincl', |
|||
'chládek', |
|||
'kancléř', |
|||
'jmenov.+profes', |
|||
'vyznamen', |
|||
'cest.+do.+moskv', |
|||
), |
|||
'nové zákony': ( |
|||
'alergen', |
|||
'těžební', |
|||
'zveřejňovala smlouv', |
|||
'elektronické eviden', |
|||
'odvodů do armád', |
|||
'výkupu kovů', |
|||
'zákaz.+kouř', |
|||
'daně.+cigar', |
|||
'klokán', |
|||
'přebalov', |
|||
), |
|||
'jazyk': ( |
|||
'příjmení', |
|||
) |
|||
}, |
|||
'obecné': { |
|||
'počasí': ( |
|||
'počas', |
|||
'tepl', |
|||
), |
|||
'datum': ( |
|||
'předsevzetí', |
|||
'pátek (13|tři)', |
|||
'17. listopad', |
|||
'1968', |
|||
'1989', |
|||
'velký pátek', |
|||
), |
|||
}, |
|||
'osobní': { |
|||
'auta': ( |
|||
'aut(o|a|em)', |
|||
'řidič', |
|||
'škoda|r200', |
|||
'silni|vozovk', |
|||
'náklad.+doprav', |
|||
), |
|||
'osobní finance': ( |
|||
'finan', |
|||
'příj(em|mu|my)', |
|||
'důchod', |
|||
'eura', |
|||
'dolar', |
|||
'bank', |
|||
'výplat', |
|||
'ceny', |
|||
'korun', |
|||
'(ú|u)tra', |
|||
'půjč', |
|||
'pen(í|ě)z', |
|||
'dan(ě|í)', |
|||
'dodavatel.+energi', |
|||
'pojiš', |
|||
'majet(ek|k)', |
|||
'dluh', |
|||
'vyhr.+loteri', |
|||
'výživné', |
|||
), |
|||
'jídlo': ( |
|||
'pije', |
|||
'nápoj', |
|||
), |
|||
'morálka': ( |
|||
'trest', |
|||
'přestup', |
|||
'chová', |
|||
'bezdomov', |
|||
'zvíř', |
|||
'hráli.+hazard', |
|||
'babybox', |
|||
'majet.+bohat', |
|||
), |
|||
'nakupování': ( |
|||
'n(a|á)kup', |
|||
'objedn', |
|||
'utrác', |
|||
'neděl.+prod', |
|||
), |
|||
'práce': ( |
|||
'spokojen', |
|||
'chatě', |
|||
'pracovní dob', |
|||
'dovolen', |
|||
'mzd', |
|||
'zaměst', |
|||
'neschopen', |
|||
'chodit práce', |
|||
'pomocnici', |
|||
), |
|||
'stáří': ( |
|||
'senio', |
|||
'stáří', |
|||
'důchod', |
|||
'penz', |
|||
), |
|||
'televize': ( |
|||
'televiz', |
|||
'stardance', |
|||
'miss', |
|||
), |
|||
'vztahy': ( |
|||
'sex', |
|||
'partner', |
|||
), |
|||
'zdraví': ( |
|||
'lékař', |
|||
'zubař', |
|||
'bodnutí', |
|||
'sport', |
|||
), |
|||
}, |
|||
'svět': { |
|||
'blízký východ': ( |
|||
'zajat.+is', |
|||
), |
|||
'rusko': ( |
|||
'rusk', |
|||
'ukrajin', |
|||
), |
|||
'uprchlíci': ( |
|||
'uprchl', |
|||
'sýri', |
|||
'syrsk', |
|||
'migra', |
|||
'irá(c|k)', |
|||
'zruš.+voln.+pohyb', |
|||
), |
|||
'bulvár': ( |
|||
'william', |
|||
), |
|||
'katastrofy': ( |
|||
'vyšetřov', |
|||
), |
|||
}, |
|||
} |
|||
|
|||
|
|||
def assign_tags_to_title(title, tag_rules): |
|||
tags = [] |
|||
for name, rules in tag_rules.items(): |
|||
if type(rules) == dict: |
|||
tags = tags + assign_tags_to_title(title, rules) |
|||
else: |
|||
for rule in rules: |
|||
if re.search(rule, title.lower()): |
|||
if name not in tags: |
|||
tags.append(name) |
|||
return tags |
|||
|
|||
|
|||
def analyze(args): |
|||
polls = reader.read_polls_sorted_no_dups(args.inputfile, args.cachedir) |
|||
stats = {} |
|||
|
|||
title = None |
|||
for id, date, data in polls: |
|||
if not data: |
|||
continue |
|||
title = data['inquiry'][0]['title'] |
|||
tags = assign_tags_to_title(title, TAG_RULES) |
|||
for tag in tags: |
|||
if tag in stats: |
|||
stats[tag] += 1 |
|||
else: |
|||
stats[tag] = 1 |
|||
if (len(tags)): |
|||
# print(', '.join(tags).upper(), title) |
|||
pass |
|||
else: |
|||
print('-------', title) |
|||
|
|||
for tag, count in sorted(stats.items(), |
|||
key=lambda item: item[1], reverse=True): |
|||
print(tag.rjust(30), '\u2587' * count) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Analyze map file mapping poll IDs to' |
|||
' timestamps -- categorize poll answers and print the stats' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--cache', '-c', dest='cachedir', required=True, |
|||
help='cache location') |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(analyze(args)) |
@ -0,0 +1,107 @@ |
|||
import calendar |
|||
import datetime |
|||
import json |
|||
import re |
|||
import os |
|||
|
|||
|
|||
def clean_json(json_str): |
|||
lines_without_comments = [] |
|||
in_comment = False |
|||
for line in json_str.splitlines(): |
|||
if line == '/*': |
|||
in_comment = True |
|||
if line == '*/': |
|||
in_comment = False |
|||
elif not in_comment: |
|||
lines_without_comments.append(line) |
|||
out = '\n'.join(lines_without_comments) |
|||
|
|||
if '\'' not in out: |
|||
return out |
|||
return out.replace('"', '\\"').replace('\'', '"') |
|||
|
|||
|
|||
def load_json(json_str): |
|||
if json_str is None: |
|||
# print('JSON EMPTY 1', json_str) |
|||
return None |
|||
if not json_str.strip(): |
|||
# print('JSON EMPTY 2', json_str) |
|||
return None |
|||
try: |
|||
data = json.loads( |
|||
clean_json(json_str) |
|||
) |
|||
if not data: |
|||
# print('JSON EMPTY 3', data) |
|||
return None |
|||
return data |
|||
except json.decoder.JSONDecodeError as e: |
|||
# print('JSON ERROR', json_str, e) |
|||
return None |
|||
|
|||
|
|||
def map_read(file_path): |
|||
polls = {} |
|||
regex = re.compile('^(\d{14})( (\d+))?$') |
|||
with open(file_path, 'r') as f: |
|||
for line in f: |
|||
line = line.strip() |
|||
m = regex.match(line) |
|||
if not m: |
|||
print('MAP READ ERROR "{}"'.format(line)) |
|||
continue |
|||
timestamp = m.group(1) |
|||
date = datetime.datetime.strptime(timestamp, '%Y%m%d%H%M%S') |
|||
id = m.group(3) |
|||
if id is None: |
|||
id = '' |
|||
if id == 'None': |
|||
id = None |
|||
polls[date] = id |
|||
return polls |
|||
|
|||
|
|||
def map_write(file_path, polls): |
|||
with open(file_path, 'w') as f: |
|||
for date, id in sorted(polls.items()): |
|||
f.write('{} {}\n'.format(date.strftime('%Y%m%d%H%M%S'), id)) |
|||
|
|||
|
|||
def map_add(input_file_path, polls_new, output_file_path=None): |
|||
if output_file_path is None: |
|||
output_file_path = input_file_path |
|||
|
|||
polls = map_read(input_file_path) |
|||
|
|||
for date, id in polls_new.items(): |
|||
print('MAP ADD {} {}'.format(id, date)) |
|||
if date in polls: |
|||
print(' MAP EXISTS {} {}'.format(id, date)) |
|||
else: |
|||
polls[date] = id |
|||
|
|||
map_write(output_file_path, polls) |
|||
|
|||
|
|||
def sort_by_date(polls): |
|||
by_date = {} |
|||
for poll in polls: |
|||
by_date[poll[1].date()] = poll # TODO overwritting |
|||
return by_date |
|||
|
|||
|
|||
def fill_year(polls_by_date, year): |
|||
polls = [] |
|||
cal = calendar.Calendar() |
|||
for month in range(1, 13): |
|||
for date in cal.itermonthdates(year, month): |
|||
if date.year != year: |
|||
continue |
|||
if date in polls_by_date: |
|||
polls.append(polls_by_date[date]) |
|||
else: |
|||
dt = datetime.datetime(date.year, date.month, date.day) |
|||
polls.append((None, dt, None)) |
|||
return polls |
@ -0,0 +1,67 @@ |
|||
import datetime |
|||
import sys |
|||
|
|||
from novinky_polls import analyzer |
|||
from novinky_polls import scraper |
|||
|
|||
|
|||
def refresh_polls(path, cache_dir, date_from): |
|||
polls = [ |
|||
(id, date) |
|||
for date, id in analyzer.map_read(path).items() |
|||
if date >= date_from |
|||
] |
|||
scraper.download_polls(polls, cache_dir, force=True) |
|||
|
|||
|
|||
def read_polls(path, cache_dir): |
|||
polls = [(id, date) for date, id in analyzer.map_read(path).items()] |
|||
polls_with_data = scraper.download_polls(polls, cache_dir) |
|||
for id, date, json_str in polls_with_data: |
|||
# If the id is None, then we have no info on the poll from that day. |
|||
# If the id is empty string, then there was no poll on that day. |
|||
# If the id is empty string and json_str is None, then poll |
|||
# downloading failed -- such polls will be excluded. |
|||
data = analyzer.load_json(json_str) |
|||
if not id or data: |
|||
yield id, date, data |
|||
|
|||
|
|||
def sort_polls(polls): |
|||
return sorted(polls, key=lambda item: item[1]) |
|||
|
|||
|
|||
def read_polls_sorted(path, cache_dir): |
|||
return sort_polls(read_polls(path, cache_dir)) |
|||
|
|||
|
|||
def read_polls_sorted_no_dups(path, cache_dir): |
|||
prev = None |
|||
for poll in read_polls_sorted(path, cache_dir): |
|||
if not prev or poll[1].date() != prev[1].date(): |
|||
prev = poll |
|||
yield poll |
|||
else: |
|||
pass |
|||
# print('SKIPPED', poll[0], poll[1]) |
|||
|
|||
|
|||
def main_refresh_polls(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Refresh polls.' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--cache', '-c', dest='cachedir', required=True, |
|||
help='cache location') |
|||
parser.add_argument('--date', '-d', dest='date_from', |
|||
help='date from (TODO help format)') |
|||
args = parser.parse_args() |
|||
|
|||
date_from = datetime.datetime.strptime(args.date_from, '%Y%m%d') |
|||
|
|||
refresh_polls(args.inputfile, args.cachedir, date_from) |
|||
|
|||
sys.exit() |
@ -0,0 +1,70 @@ |
|||
import datetime |
|||
import locale |
|||
import os.path |
|||
import sys |
|||
|
|||
from novinky_polls import analyzer |
|||
from novinky_polls import reader |
|||
from novinky_polls import renderer |
|||
|
|||
|
|||
DEFAULT_LOCALE = 'en_US.utf8' |
|||
DEFAULT_TEMPLATE = os.path.join( |
|||
os.path.abspath(os.path.dirname(__file__)), |
|||
'templates/document.html' |
|||
) |
|||
CHAR_NBSP = ' ' |
|||
|
|||
|
|||
def render_html(args): |
|||
locale.setlocale(locale.LC_ALL, args.locale) |
|||
|
|||
polls = reader.read_polls_sorted_no_dups(args.inputfile, args.cachedir) |
|||
|
|||
if args.year: |
|||
polls = analyzer.fill_year( |
|||
analyzer.sort_by_date(polls), |
|||
args.year |
|||
) |
|||
|
|||
out = renderer.render(polls, args.template, CHAR_NBSP) |
|||
|
|||
path = renderer.add_timestamp_to_path(args.outputfile, args.timestamp) |
|||
print('EXPORT', path) |
|||
with open(path, 'w') as f: |
|||
print(out, file=f) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Render map file -- mapping poll IDs to' |
|||
' timestamps -- as an HTML' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--output', '-o', dest='outputfile', required=True, |
|||
help='output HTML file path') |
|||
parser.add_argument('--cache', '-c', dest='cachedir', required=True, |
|||
help='cache location') |
|||
parser.add_argument('--template', '-m', dest='template', |
|||
default=DEFAULT_TEMPLATE, |
|||
help='Mustache HTML template file path; optional,' |
|||
' if not specified {} is used'. |
|||
format(DEFAULT_TEMPLATE)) |
|||
parser.add_argument('--locale', '-l', dest='locale', |
|||
default=DEFAULT_LOCALE, |
|||
help='locale for date formatting; optional, if not' |
|||
' specified {} is used'. |
|||
format(DEFAULT_LOCALE)) |
|||
parser.add_argument('--timestamp', '-t', dest='timestamp', |
|||
action='store_true', |
|||
help='append current timestamp to the output file' |
|||
' name; optional') |
|||
parser.add_argument('--year', '-y', dest='year', type=int, |
|||
help='render all dates of passed year, even dates for' |
|||
' which a poll was not found') |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(render_html(args)) |
@ -0,0 +1,58 @@ |
|||
import locale |
|||
import os.path |
|||
import sys |
|||
|
|||
from novinky_polls import reader |
|||
from novinky_polls import renderer |
|||
|
|||
|
|||
DEFAULT_LOCALE = 'en_US.utf8' |
|||
DEFAULT_TEMPLATE = os.path.join( |
|||
os.path.abspath(os.path.dirname(__file__)), |
|||
'templates/document.tex' |
|||
) |
|||
CHAR_NBSP = '~' |
|||
|
|||
|
|||
def render_print(args): |
|||
locale.setlocale(locale.LC_ALL, args.locale) |
|||
|
|||
polls = reader.read_polls_sorted_no_dups(args.inputfile, args.cachedir) |
|||
out = renderer.render(polls, args.template, CHAR_NBSP) |
|||
|
|||
path = renderer.add_timestamp_to_path(args.outputfile, args.timestamp) |
|||
print('EXPORT', path) |
|||
with open(path, 'w') as f: |
|||
print(out, file=f) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Render map file -- mapping poll IDs to' |
|||
' timestamps -- as TeX' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--output', '-o', dest='outputfile', required=True, |
|||
help='output TeX file path') |
|||
parser.add_argument('--cache', '-c', dest='cachedir', required=True, |
|||
help='cache location') |
|||
parser.add_argument('--template', '-m', dest='template', |
|||
default=DEFAULT_TEMPLATE, |
|||
help='Mustache TeX template file path; optional,' |
|||
' if not specified {} is used'. |
|||
format(DEFAULT_TEMPLATE)) |
|||
parser.add_argument('--locale', '-l', dest='locale', |
|||
default=DEFAULT_LOCALE, |
|||
help='locale for date formatting; optional, if not' |
|||
' specified {} is used'. |
|||
format(DEFAULT_LOCALE)) |
|||
parser.add_argument('--timestamp', '-t', dest='timestamp', |
|||
action='store_true', |
|||
help='append current timestamp to the output file' |
|||
' name; optional') |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(render_print(args)) |
@ -0,0 +1,48 @@ |
|||
import sys |
|||
|
|||
from novinky_polls import reader |
|||
from novinky_polls import renderer |
|||
|
|||
|
|||
def render_poll(id, date, data): |
|||
out = [] |
|||
if data: |
|||
out.append(data['inquiry'][0]['title']) |
|||
[out.append(answer['text']) |
|||
for answer in data['inquiry'][0]['answers']] |
|||
out.append('') |
|||
return out |
|||
|
|||
|
|||
def render_text(args): |
|||
out = [] |
|||
polls = reader.read_polls_sorted_no_dups(args.inputfile, args.cachedir) |
|||
for id, date, data in polls: |
|||
out = out + render_poll(id, date, data) |
|||
|
|||
path = renderer.add_timestamp_to_path(args.outputfile, args.timestamp) |
|||
print('EXPORT', path) |
|||
with open(path, 'w') as f: |
|||
print('\n'.join(out), file=f) |
|||
|
|||
|
|||
def main(): |
|||
import argparse |
|||
|
|||
parser = argparse.ArgumentParser( |
|||
description='Novinky: Render map file -- mapping poll IDs to' |
|||
' timestamps -- as an plain text' |
|||
) |
|||
parser.add_argument('--input', '-i', dest='inputfile', required=True, |
|||
help='input map file path') |
|||
parser.add_argument('--output', '-o', dest='outputfile', required=True, |
|||
help='output plain text file path') |
|||
parser.add_argument('--cache', '-c', dest='cachedir', required=True, |
|||
help='cache location') |
|||
parser.add_argument('--timestamp', '-t', dest='timestamp', |
|||
action='store_true', |
|||
help='append current timestamp to the output file' |
|||
' name; optional') |
|||
args = parser.parse_args() |
|||
|
|||
sys.exit(render_text(args)) |
@ -0,0 +1,67 @@ |
|||
import datetime |
|||
import os.path |
|||
import re |
|||
|
|||
import pystache |
|||
|
|||
|
|||
TIMESTAMP_FORMAT = '%Y%m%d%H%M%S' |
|||
|
|||
|
|||
def fix_line_breaks(text, char_nbsp): |
|||
# TODO If there are multiple one-letter words in a row, NBSP is inserted |
|||
# only after the first one, because the regex matches overlap. |
|||
return re.sub(r'([^\S]\S) |^(\S) ', '\g<1>\g<2>' + char_nbsp, text) |
|||
|
|||
|
|||
def add_timestamp_to_path(path, timestamp): |
|||
if not timestamp: |
|||
return path |
|||
|
|||
root, ext = os.path.splitext(path) |
|||
return '{}-{}{}'.format( |
|||
root, |
|||
datetime.datetime.now().strftime(TIMESTAMP_FORMAT), |
|||
ext |
|||
) |
|||
|
|||
|
|||
def convert_polls_to_template_context(polls, char_nbsp): |
|||
c_polls = [] |
|||
for id, date, data in polls: |
|||
c_poll = {} |
|||
if id is None: |
|||
c_poll['date'] = '' |
|||
else: |
|||
c_poll['date'] = date.strftime('%-d. %B') |
|||
if not data: |
|||
c_poll['notEmpty'] = False |
|||
else: |
|||
c_poll['title'] = fix_line_breaks( |
|||
data['inquiry'][0]['title'], char_nbsp |
|||
) |
|||
c_poll['answers'] = [] |
|||
i = 0 |
|||
for answer in data['inquiry'][0]['answers']: |
|||
c_poll['answers'].append({ |
|||
'text': fix_line_breaks( |
|||
answer['text'], char_nbsp |
|||
), |
|||
'pc': answer['pc'], |
|||
'pcFormatted': str(answer['pc']).replace('.', ','), |
|||
'width': round(answer['pc'] / 100, 2), |
|||
'even': i |
|||
}) |
|||
i = i ^ 1 |
|||
c_poll['sumCount'] = data['inquiry'][0]['sumCount'] |
|||
c_poll['notEmpty'] = True |
|||
c_polls.append(c_poll) |
|||
return { |
|||
'polls': c_polls |
|||
} |
|||
|
|||
|
|||
def render(polls, template_file_path, char_nbsp): |
|||
context = convert_polls_to_template_context(polls, char_nbsp) |
|||
with open(template_file_path, 'r') as f: |
|||
return pystache.render(f.read(), context) |
@ -0,0 +1,97 @@ |
|||
import datetime |
|||
import json |
|||
import os |
|||
import re |
|||
import sys |
|||
|
|||
import requests |
|||
from bs4 import BeautifulSoup |
|||
|
|||
|
|||
HEADERS = { |
|||
'User-Agent': |
|||
'Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/42.0', |
|||
'Accept': |
|||
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', |
|||
'Accept-Language': 'en-US,en;q=0.5', |
|||
'DNT': '1', |
|||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', |
|||
'Pragma': 'no-cache', |
|||
'Cache-Control': 'no-cache', |
|||
'Referer': 'http://www.novinky.cz/', |
|||
} |
|||
URL = 'http://www.novinky.cz/inquiry/screen?inquiryIds={}' |
|||
|
|||
|
|||
def cache_read(cache_dir): |
|||
cache = {} |
|||
for entry in os.scandir(cache_dir): |
|||
if not entry.name.startswith('.') and entry.is_file(): |
|||
m = re.search('^(\d+)\.json$', entry.name) |
|||
if m: |
|||
id = m.group(1) |
|||
with open(entry.path, 'r') as f: |
|||
data = f.read() |
|||
cache[id] = data |
|||
return cache |
|||
|
|||
|
|||
def cache_store(cache_dir, id, json_str): |
|||
os.makedirs(cache_dir, exist_ok=True) |
|||
file_path = os.path.join(cache_dir, id + '.json') |
|||
print(' FILE {}'.format(file_path)) |
|||
with open(file_path, 'w') as f: |
|||
print(json_str, file=f) |
|||
|
|||
|
|||
def download_homepage(url): |
|||
r = requests.get(url, headers=HEADERS) |
|||
return r.text |
|||
|
|||
|
|||
def parse_poll_id(html): |
|||
soup = BeautifulSoup(html, 'html.parser') |
|||
poll = soup.find(class_='inquiry') |
|||
if not poll: |
|||
return None |
|||
m = re.search('\d+', poll['id']) |
|||
return m.group(0) |
|||
|
|||
|
|||
def download_poll_by_id(id, cache_dir, cache=None, force=False): |
|||
# print('POLL', id) |
|||
|
|||
if not id: |
|||
return None |
|||
|
|||
if cache is None: |
|||
cache = cache_read(cache_dir) |
|||
if not force and id in cache: |
|||
return cache[id] |
|||
|
|||
print('DOWNLOAD {}'.format(id)) |
|||
r = requests.get( |
|||
URL.format(id), |
|||
headers=HEADERS |
|||
) |
|||
if r.status_code != requests.codes.ok: |
|||
return '' |
|||
json_str = r.text |
|||
cache_store(cache_dir, id, json_str) |
|||
return json_str |
|||
|
|||
|
|||
def download_polls(polls, cache_dir, force=False): |
|||
cache = cache_read(cache_dir) |
|||
return [ |
|||
(id, date, download_poll_by_id(id, cache_dir, cache, force=force)) |
|||
for id, date in polls |
|||
] |
|||
|
|||
|
|||
def download_page_and_parse_poll_id(url): |
|||
id = parse_poll_id( |
|||
download_homepage(url) |
|||
) |
|||
print('PAGE {} {}'.format(url, id)) |
|||
return id |
@ -0,0 +1,25 @@ |
|||
{{#polls}} |
|||
<div class="poll"> |
|||
<div class="poll-head"> |
|||
<h2>{{&date}}</h2> |
|||
{{#notEmpty}} |
|||
<p>{{&title}}</p> |
|||
{{/notEmpty}} |
|||
</div> |
|||
{{#notEmpty}} |
|||
{{#answers}} |
|||
<div class="poll-answer"> |
|||
<h3> |
|||
<a href="javascript:void(0)">{{&text}}</a> |
|||
</h3> |
|||
<span class="poll-answer-bar item-{{even}}"> |
|||
<a href="javascript:void(0)" style="width: {{pc}}%"><span class="poll-answer-perc">{{pc}}%</span></a> |
|||
</span> |
|||
</div> |
|||
{{/answers}} |
|||
<div class="poll-foot"> |
|||
<p>Celkem hlasovalo {{sumCount}} čtenářů.</p> |
|||
</div> |
|||
{{/notEmpty}} |
|||
</div> |
|||
{{/polls}} |
@ -0,0 +1,99 @@ |
|||
{{=(( ))=}} |
|||
\documentclass[a4paper,12pt,landscape]{article} |
|||
\usepackage{fontspec} |
|||
\usepackage{polyglossia} |
|||
\setdefaultlanguage{czech} |
|||
|
|||
%\usepackage[top=4cm, bottom=2.5cm, left=4cm, right=4cm]{geometry} |
|||
\usepackage[margin=1cm]{geometry} |
|||
\pagestyle{empty} |
|||
|
|||
\setsansfont{Roboto}[ |
|||
UprightFont = * Light, |
|||
ItalicFont = * Light Italic, |
|||
BoldFont = * Medium, |
|||
] |
|||
%\setsansfont{OpenSans}[ |
|||
% UprightFont = *, |
|||
% ItalicFont = * Italic, |
|||
% BoldFont = * Bold, |
|||
%] |
|||
\renewcommand*{\familydefault}{\sfdefault} |
|||
|
|||
\usepackage{tikz} |
|||
\usetikzlibrary{patterns} |
|||
|
|||
\pgfdeclarepatternformonly{section} |
|||
{\pgfqpoint{-1pt}{-1pt}} |
|||
{\pgfqpoint{4pt}{4pt}} |
|||
{\pgfqpoint{4pt}{4pt}}{ |
|||
\pgfsetlinewidth{0.4pt} |
|||
\pgfpathmoveto{\pgfqpoint{0pt}{0pt}} |
|||
\pgfpathlineto{\pgfqpoint{4pt}{4pt}} |
|||
\pgfusepath{stroke} |
|||
} |
|||
|
|||
\begin{document} |
|||
|
|||
\setlength{\parindent}{0pt} |
|||
\sffamily |
|||
|
|||
((#polls)) |
|||
\begin{minipage}[h][0.98\textheight][t]{\textwidth} |
|||
|
|||
((#notEmpty)) |
|||
\vspace{0.6\baselineskip} |
|||
|
|||
\begingroup |
|||
|
|||
\fontsize{36pt}{40pt}\selectfont\raggedright |
|||
((&title)) |
|||
|
|||
\endgroup |
|||
|
|||
\vspace{1.2\baselineskip} |
|||
|
|||
((#answers)) |
|||
\begingroup |
|||
|
|||
\fontsize{22pt}{24pt}\selectfont\raggedright |
|||
|
|||
\vspace{0.5\baselineskip} |
|||
|
|||
((&text)) |
|||
|
|||
\vspace{0.2\baselineskip} |
|||
|
|||
\fontsize{16pt}{16pt}\selectfont |
|||
\begin{tikzpicture} |
|||
\draw[pattern=section] (0,0) rectangle ( ((&width))\textwidth,0.75em ); |
|||
\end{tikzpicture} |
|||
~((&pcFormatted))~\% |
|||
|
|||
\endgroup |
|||
((/answers)) |
|||
((/notEmpty)) |
|||
|
|||
\end{minipage} |
|||
|
|||
\begin{minipage}[h][0.02\textheight][t]{\textwidth} |
|||
|
|||
\begingroup |
|||
|
|||
\fontsize{16pt}{16pt}\selectfont |
|||
|
|||
((#notEmpty)) |
|||
Celkem hlasovalo ((&sumCount)) čtenářů. |
|||
((/notEmpty)) |
|||
\hfill |
|||
\emph{((&date))} |
|||
|
|||
\endgroup |
|||
|
|||
\end{minipage} |
|||
|
|||
\clearpage |
|||
|
|||
((/polls)) |
|||
|
|||
\end{document} |
@ -0,0 +1,25 @@ |
|||
#!/bin/sh |
|||
|
|||
for i in {13677..14645} |
|||
do |
|||
if [ ! -f "$i.json" ] |
|||
then |
|||
echo "$i.json" |
|||
curl "http://www.novinky.cz/inquiry/screen?inquiryIds=$1"\ |
|||
-H 'Host: www.novinky.cz'\ |
|||
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0'\ |
|||
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'\ |
|||
-H 'Accept-Language: en-US,en;q=0.5'\ |
|||
--compressed\ |
|||
-H 'DNT: 1'\ |
|||
-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8'\ |
|||
-H 'Pragma: no-cache'\ |
|||
-H 'Cache-Control: no-cache'\ |
|||
-H 'Referer: http://www.novinky.cz/'\ |
|||
-H 'Connection: keep-alive'\ |
|||
> "$i.json" |
|||
fi |
|||
done |
|||
|
|||
# -H 'Cookie: __gfp_64b=1YxgAnGXDRc8PfICVru3Uuiuq81RYkx7BQB6_pEHe8X.U7; _chartbeat2=xwz2BBKJrbXD432hm.1439920925967.1439920925967.1; AppPromoCarouselIndex=1; ds=; inquiry=INQUIRY:|14592=52990; inquiryCSRFtoken=NWZ5DNE2H4HVD81H6EFG9N992W9ANS27F3CWO4WA' |
|||
# --data 'inquiryIds=14592' |
@ -0,0 +1,18 @@ |
|||
#!/bin/sh |
|||
|
|||
curl "http://www.novinky.cz/inquiry/screen?inquiryIds=$1"\ |
|||
-H 'Host: www.novinky.cz'\ |
|||
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0'\ |
|||
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'\ |
|||
-H 'Accept-Language: en-US,en;q=0.5'\ |
|||
--compressed\ |
|||
-H 'DNT: 1'\ |
|||
-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8'\ |
|||
-H 'Pragma: no-cache'\ |
|||
-H 'Cache-Control: no-cache'\ |
|||
-H 'Referer: http://www.novinky.cz/'\ |
|||
-H 'Connection: keep-alive'\ |
|||
> "$1.json" |
|||
|
|||
# -H 'Cookie: __gfp_64b=1YxgAnGXDRc8PfICVru3Uuiuq81RYkx7BQB6_pEHe8X.U7; _chartbeat2=xwz2BBKJrbXD432hm.1439920925967.1439920925967.1; AppPromoCarouselIndex=1; ds=; inquiry=INQUIRY:|14592=52990; inquiryCSRFtoken=NWZ5DNE2H4HVD81H6EFG9N992W9ANS27F3CWO4WA' |
|||
# --data 'inquiryIds=14592' |
@ -0,0 +1,60 @@ |
|||
from setuptools import setup, find_packages |
|||
from codecs import open |
|||
from os import path |
|||
|
|||
here = path.abspath(path.dirname(__file__)) |
|||
|
|||
with open(path.join(here, 'README.md'), encoding='utf-8') as f: |
|||
long_description = f.read() |
|||
|
|||
setup( |
|||
name='novinky_polls', |
|||
|
|||
version='1.0.0', |
|||
|
|||
description='Tools to work with Novinky.cz\'s polls', |
|||
long_description=long_description, |
|||
|
|||
url='https://lab.saloun.cz/jakub/novinky-polls', |
|||
|
|||
author='Jakub Valenta', |
|||
author_email='jakub@jakubvalenta.cz', |
|||
|
|||
license='Apache Software License', |
|||
|
|||
classifiers=[ |
|||
'Development Status :: 3 - Alpha', |
|||
'Intended Audience :: Developers', |
|||
'Topic :: Artistic Software', |
|||
'License :: OSI Approved :: Apache Software License', |
|||
'Programming Language :: Python :: 3', |
|||
], |
|||
|
|||
keywords='', |
|||
|
|||
packages=find_packages(), |
|||
|
|||
install_requires=[ |
|||
'requests', |
|||
'beautifulsoup4', |
|||
'pystache', |
|||
], |
|||
|
|||
package_data={ |
|||
'novinky_polls': [ |
|||
'templates/*', |
|||
'test/*', |
|||
], |
|||
}, |
|||
|
|||
entry_points={ |
|||
'console_scripts': [ |
|||
'novinky-polls-add-archive-org=novinky_polls.add_archive_org:main', |
|||
'novinky-polls-add-current=novinky_polls.add_current:main', |
|||
'novinky-polls-analyze=novinky_polls.analyze:main', |
|||
'novinky-polls-render-html=novinky_polls.render_html:main', |
|||
'novinky-polls-render-print=novinky_polls.render_print:main', |
|||
'novinky-polls-render-text=novinky_polls.render_text:main', |
|||
], |
|||
}, |
|||
) |
Loading…
Reference in new issue