mirror of
https://github.com/TheAlgorithms/Python.git
synced 2025-07-05 17:34:49 +08:00
Lukazlim: Replace dependency requests
with httpx
(#12744)
* Replace dependency `requests` with `httpx` Fixes #12742 Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> * updating DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> Co-authored-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> Co-authored-by: cclauss <cclauss@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@ -1,5 +1,12 @@
|
||||
"""Get the site emails from URL."""
|
||||
|
||||
# /// script
|
||||
# requires-python = ">=3.13"
|
||||
# dependencies = [
|
||||
# "httpx",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__author__ = "Muhammad Umer Farooq"
|
||||
@ -13,7 +20,7 @@ import re
|
||||
from html.parser import HTMLParser
|
||||
from urllib import parse
|
||||
|
||||
import requests
|
||||
import httpx
|
||||
|
||||
|
||||
class Parser(HTMLParser):
|
||||
@ -72,7 +79,7 @@ def emails_from_url(url: str = "https://github.com") -> list[str]:
|
||||
|
||||
try:
|
||||
# Open URL
|
||||
r = requests.get(url, timeout=10)
|
||||
r = httpx.get(url, timeout=10, follow_redirects=True)
|
||||
|
||||
# pass the raw HTML to the parser to get links
|
||||
parser.feed(r.text)
|
||||
@ -81,9 +88,15 @@ def emails_from_url(url: str = "https://github.com") -> list[str]:
|
||||
valid_emails = set()
|
||||
for link in parser.urls:
|
||||
# open URL.
|
||||
# read = requests.get(link)
|
||||
# Check if the link is already absolute
|
||||
if not link.startswith("http://") and not link.startswith("https://"):
|
||||
# Prepend protocol only if link starts with domain, normalize otherwise
|
||||
if link.startswith(domain):
|
||||
link = f"https://{link}"
|
||||
else:
|
||||
link = parse.urljoin(f"https://{domain}", link)
|
||||
try:
|
||||
read = requests.get(link, timeout=10)
|
||||
read = httpx.get(link, timeout=10, follow_redirects=True)
|
||||
# Get the valid email.
|
||||
emails = re.findall("[a-zA-Z0-9]+@" + domain, read.text)
|
||||
# If not in list then append it.
|
||||
|
Reference in New Issue
Block a user