mirror of
https://codeberg.org/hyperreal/print-wp-sources
synced 2025-01-18 06:13:44 +01:00
Use article URL and include Wikinews
This commit is contained in:
parent
58de195098
commit
835f4eb037
28
README.md
28
README.md
@ -1,19 +1,33 @@
|
|||||||
# print-wp-sources
|
# print-wp-sources
|
||||||
|
|
||||||
This program just prints the sources of the given Wikipedia article to standard output.
|
This program just prints the sources of the given Wikipedia or Wikinews article to standard output.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
``` shell
|
``` shell
|
||||||
pipx install print-wp-sources --include-deps
|
pipx install print-wp-sources
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Use the Wikipedia article's name as the argument to `print-wp-sources`. For example, if the article's URL is `https://en.wikipedia.org/wiki/Automatic_negative_thoughts`, then the argument for `print-wp-sources` would be `"Automatic_negative_thoughts"`.
|
Use the Wikipedia or WikiNews article's URL as the argument.
|
||||||
|
|
||||||
``` shell
|
``` shell
|
||||||
print-wp-sources "Automatic_negative_thoughts"
|
print-wp-sources "https://en.wikinews.org/wiki/Israel-Lebanon_ceasefire_faces_several_violations"
|
||||||
|
|
||||||
|
Output:
|
||||||
|
https://en.wikinews.org/w/index.php?title=Special:Log&type=review&page=Israel-Lebanon_ceasefire_faces_several_violations
|
||||||
|
https://en.wikinews.org/w/index.php?title=Israel-Lebanon_ceasefire_faces_several_violations&action=info#mw-flaggedrevs-action-info-pages-waiting-for-review
|
||||||
|
https://thedefensepost.com/2024/12/06/israel-strikes-hezbollah-smuggling-routes/
|
||||||
|
https://www.cbsnews.com/news/israel-war-palestinians-ceasefire-hezbollah-lebanon-strained-by-strikes/
|
||||||
|
https://www.cbsnews.com/news/israel-hezbollah-ceasefire-claims-of-violations-on-day-2-war-hamas-gaza/
|
||||||
|
https://theconversation.com/why-israel-and-hezbollah-reached-a-ceasefire-now-and-what-it-means-for-israel-lebanon-biden-and-trump-244700
|
||||||
|
https://www.abc.net.au/news/2024-11-25/lebanon-ceasefire-pending-israeli-response/104642856
|
||||||
|
https://www.aljazeera.com/news/liveblog/2024/11/25/live-destruction-in-tel-aviv-beirut-amid-hezbollah-israel-missile-fire
|
||||||
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
print-wp-sources "https://en.wikipedia.org/wiki/Automatic_negative_thoughts"
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
https://pubmed.ncbi.nlm.nih.gov/26431418
|
https://pubmed.ncbi.nlm.nih.gov/26431418
|
||||||
@ -27,11 +41,11 @@ https://pubmed.ncbi.nlm.nih.gov/6630686
|
|||||||
https://wikimediafoundation.org/
|
https://wikimediafoundation.org/
|
||||||
```
|
```
|
||||||
|
|
||||||
> Note: make sure to use quotes around the article name in the argument to `print-wp-sources`.
|
> Note: make sure to use quotes around the article URL in the argument to `print-wp-sources`.
|
||||||
|
|
||||||
One can also easily pipe the output to a file.
|
One can also easily pipe the output to a file.
|
||||||
|
|
||||||
``` shell
|
``` shell
|
||||||
print-wp-sources "Automatic_negative_thoughts" > sources.txt
|
print-wp-sources "https://en.wikipedia.org/wiki/Automatic_negative_thoughts" > sources.txt
|
||||||
print-wp-sources "Python_(programming_language)" | tee sources.txt
|
print-wp-sources "https://en.wikipedia.org/wiki/Python_(programming_language)" | tee sources.txt
|
||||||
```
|
```
|
||||||
|
@ -1,20 +1,21 @@
|
|||||||
import sys
|
import sys
|
||||||
from urllib.parse import unquote
|
|
||||||
from urllib.request import urlopen
|
|
||||||
|
|
||||||
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
exit("Usage: print-wp-sources ARTICLE_NAME")
|
exit("Usage: print-wp-sources ARTICLE_URL")
|
||||||
article = sys.argv[1]
|
article_url = sys.argv[1]
|
||||||
|
|
||||||
url = f"https://en.wikipedia.org/wiki/{article}"
|
text = requests.get(article_url).text
|
||||||
text = urlopen(url).read()
|
|
||||||
soup = BeautifulSoup(text, "html.parser")
|
soup = BeautifulSoup(text, "html.parser")
|
||||||
for link in soup.find_all("a", attrs={"class": "external text"}):
|
for link in soup.find_all("a", attrs={"class": "external text"}):
|
||||||
print(unquote(link.get("href")))
|
if "wikimediafoundation.org" not in link.get(
|
||||||
|
"href"
|
||||||
|
) and "foundation.wikimedia.org" not in link.get("href"):
|
||||||
|
print(link.get("href"))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "print-wp-sources"
|
name = "print-wp-sources"
|
||||||
version = "0.4"
|
version = "0.5"
|
||||||
authors = [
|
authors = [
|
||||||
{ name="Jeffrey Serio", email="hyperreal@fedoraproject.org" },
|
{ name="Jeffrey Serio", email="hyperreal@fedoraproject.org" },
|
||||||
]
|
]
|
||||||
description = "Print sources from Wikipedia articles to stdout."
|
description = "Print sources from Wikipedia or Wikinews articles to stdout."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
classifiers = [
|
classifiers = [
|
||||||
@ -12,7 +12,7 @@ classifiers = [
|
|||||||
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
]
|
]
|
||||||
dependencies = ["beautifulsoup4>=4.12.3"]
|
dependencies = ["beautifulsoup4>=4.12.3", "requests>=2.32.3"]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
print-wp-sources = "print_wp_sources:main"
|
print-wp-sources = "print_wp_sources:main"
|
||||||
|
Loading…
Reference in New Issue
Block a user