Lukazlim: Replace dependency requests with httpx (#12744)

* Replace dependency `requests` with `httpx` Fixes #12742 Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> * updating DIRECTORY.md * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> Co-authored-by: Lim, Lukaz Wei Hwang <lukaz.wei.hwang.lim@intel.com> Co-authored-by: cclauss <cclauss@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2025-07-05 17:34:49 +08:00 · 2025-05-14 03:42:11 +02:00
parent 6e4d1b3765
commit a2fa32c7ad
40 changed files with 957 additions and 640 deletions
--- a/web_programming/emails_from_url.py
+++ b/web_programming/emails_from_url.py
@ -1,5 +1,12 @@
 """Get the site emails from URL."""

+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+#     "httpx",
+# ]
+# ///
+
 from __future__ import annotations

 __author__ = "Muhammad Umer Farooq"
@ -13,7 +20,7 @@ import re
 from html.parser import HTMLParser
 from urllib import parse

-import requests
+import httpx


 class Parser(HTMLParser):
@ -72,7 +79,7 @@ def emails_from_url(url: str = "https://github.com") -> list[str]:

    try:
        # Open URL
-        r = requests.get(url, timeout=10)
+        r = httpx.get(url, timeout=10, follow_redirects=True)

        # pass the raw HTML to the parser to get links
        parser.feed(r.text)
@ -81,9 +88,15 @@ def emails_from_url(url: str = "https://github.com") -> list[str]:
        valid_emails = set()
        for link in parser.urls:
            # open URL.
-            # read = requests.get(link)
+            # Check if the link is already absolute
+            if not link.startswith("http://") and not link.startswith("https://"):
+                # Prepend protocol only if link starts with domain, normalize otherwise
+                if link.startswith(domain):
+                    link = f"https://{link}"
+                else:
+                    link = parse.urljoin(f"https://{domain}", link)
            try:
-                read = requests.get(link, timeout=10)
+                read = httpx.get(link, timeout=10, follow_redirects=True)
                # Get the valid email.
                emails = re.findall("[a-zA-Z0-9]+@" + domain, read.text)
                # If not in list then append it.