From 86031e12a3543f9e2cabfea162d349fab7387224 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Fri, 24 May 2024 20:30:53 +0530
Subject: [PATCH 01/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index f448177..ffa0441 100644
--- a/main.py
+++ b/main.py
@@ -26,7 +26,7 @@
 
 START_BTN = InlineKeyboardMarkup(
         [[
-        InlineKeyboardButton('Source Code', url='https://github.com/samadii/WebDownloaderBot'),
+        InlineKeyboardButton('My Father', url='https://t.me/Matiz_Owner'),
         ]]
     )
 

From a944b101dfbe96de2b383c86ffc44da01d91f8ca Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Fri, 24 May 2024 20:37:08 +0530
Subject: [PATCH 02/66] Update web_dl.py

---
 web_dl.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 37a9e3e..722bb60 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -5,8 +5,6 @@
 from bs4 import BeautifulSoup
 
 
-#-----------------------------------------------------------------------------
-#-----------------------------------------------------------------------------
 class urlDownloader(object):
     """ Download the webpage components base on the input url."""
     def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True):
@@ -14,25 +12,28 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True):
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
-        self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml','js')
+        self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.session = requests.Session()
         
-    #-----------------------------------------------------------------------------
     def savePage(self, url, pagefolder='page'):
         """ Save the web page components based on the input url and dir name.
         Args:
             url ([try]): web url string.
             pagefolder (str, optional): path to save the web components.
         Returns:
-            [bool]: whether the components saved the successfully.
+            [bool]: whether the components saved successfully.
         """
         try:
             response = self.session.get(url)
             self.soup = BeautifulSoup(response.text, features="lxml")
-            if not os.path.exists(pagefolder): os.mkdir(pagefolder)
-            if self.imgFlg: self._soupfindnSave(url, pagefolder, tag2find='img', inner='src')
-            if self.linkFlg: self._soupfindnSave(url, pagefolder, tag2find='link', inner='href')
-            if self.scriptFlg: self._soupfindnSave(url, pagefolder, tag2find='script', inner='src')
+            if not os.path.exists(pagefolder):
+                os.mkdir(pagefolder)
+            if self.imgFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src')
+            if self.linkFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href')
+            if self.scriptFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
             return True
@@ -40,16 +41,17 @@ def savePage(self, url, pagefolder='page'):
             print("> savePage(): Create files failed: %s." % str(e))
             return False
 
-    #-----------------------------------------------------------------------------
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
         """ Saves on specified pagefolder all tag2find objects. """
         pagefolder = os.path.join(pagefolder, tag2find)
-        if not os.path.exists(pagefolder): os.mkdir(pagefolder)
-        for res in self.soup.findAll(tag2find):   # images, css, etc..
+        if not os.path.exists(pagefolder):
+            os.mkdir(pagefolder)
+        for res in self.soup.findAll(tag2find):  # images, css, etc..
             try:
-                if not res.has_attr(inner): continue # check if inner tag (file object) exists
+                if not res.has_attr(inner):
+                    continue  # check if inner tag (file object) exists
                 # clean special chars such as '@, # ? <>'
-                filename = re.sub('\W+', '.', os.path.basename(res[inner]))
+                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
                 # print("> filename:", filename)
                 # Added the '.html' for the html file in the href
                 if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
@@ -62,8 +64,7 @@ def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
                 if not os.path.isfile(filepath):
                     with open(filepath, 'wb') as file:
                         filebin = self.session.get(fileurl)
-                        if len(filebin.content) > 0: # filter the empty file(imge not found)
+                        if len(filebin.content) > 0:  # filter the empty file(imge not found)
                             file.write(filebin.content)
             except Exception as exc:
                 print(exc, file=sys.stderr)
-

From 7ac4154408b526d4aff12b39058a492d5b2b78fb Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Fri, 24 May 2024 20:44:36 +0530
Subject: [PATCH 03/66] Update web_dl.py

---
 web_dl.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 722bb60..939a033 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -4,9 +4,8 @@
 from urllib.parse import urljoin, urlparse
 from bs4 import BeautifulSoup
 
-
 class urlDownloader(object):
-    """ Download the webpage components base on the input url."""
+    """ Download the webpage components based on the input URL."""
     def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True):
         self.soup = None
         self.imgFlg = imgFlg
@@ -16,12 +15,12 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True):
         self.session = requests.Session()
         
     def savePage(self, url, pagefolder='page'):
-        """ Save the web page components based on the input url and dir name.
+        """ Save the web page components based on the input URL and dir name.
         Args:
-            url ([try]): web url string.
+            url (str): web URL string.
             pagefolder (str, optional): path to save the web components.
         Returns:
-            [bool]: whether the components saved successfully.
+            bool: whether the components saved successfully.
         """
         try:
             response = self.session.get(url)
@@ -38,7 +37,7 @@ def savePage(self, url, pagefolder='page'):
                 file.write(self.soup.prettify('utf-8'))
             return True
         except Exception as e:
-            print("> savePage(): Create files failed: %s." % str(e))
+            print(f"> savePage(): Create files failed: {str(e)}.")
             return False
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
@@ -64,7 +63,7 @@ def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
                 if not os.path.isfile(filepath):
                     with open(filepath, 'wb') as file:
                         filebin = self.session.get(fileurl)
-                        if len(filebin.content) > 0:  # filter the empty file(imge not found)
+                        if len(filebin.content) > 0:  # filter the empty file (image not found)
                             file.write(filebin.content)
             except Exception as exc:
                 print(exc, file=sys.stderr)

From 9c0ae8d1fb874ea1ff58fbbe7c478f16d8b09f78 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:01:07 +0530
Subject: [PATCH 04/66] Update web_dl.py

---
 web_dl.py | 53 +++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 939a033..b5dbdfd 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -3,6 +3,7 @@
 import requests
 from urllib.parse import urljoin, urlparse
 from bs4 import BeautifulSoup
+from tqdm import tqdm
 
 class urlDownloader(object):
     """ Download the webpage components based on the input URL."""
@@ -41,29 +42,29 @@ def savePage(self, url, pagefolder='page'):
             return False
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
-        """ Saves on specified pagefolder all tag2find objects. """
-        pagefolder = os.path.join(pagefolder, tag2find)
-        if not os.path.exists(pagefolder):
-            os.mkdir(pagefolder)
-        for res in self.soup.findAll(tag2find):  # images, css, etc..
-            try:
-                if not res.has_attr(inner):
-                    continue  # check if inner tag (file object) exists
-                # clean special chars such as '@, # ? <>'
-                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-                # print("> filename:", filename)
-                # Added the '.html' for the html file in the href
-                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
-                    filename += '.html'
-                fileurl = urljoin(url, res.get(inner))
-                filepath = os.path.join(pagefolder, filename)
-                # rename html ref so can move html and folder of files anywhere
-                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
-                # create the file.
-                if not os.path.isfile(filepath):
-                    with open(filepath, 'wb') as file:
-                        filebin = self.session.get(fileurl)
-                        if len(filebin.content) > 0:  # filter the empty file (image not found)
-                            file.write(filebin.content)
-            except Exception as exc:
-                print(exc, file=sys.stderr)
+    """ Saves on specified pagefolder all tag2find objects. """
+    pagefolder = os.path.join(pagefolder, tag2find)
+    if not os.path.exists(pagefolder):
+        os.mkdir(pagefolder)
+    elements = self.soup.findAll(tag2find)
+    for res in tqdm(elements, desc=f"Downloading {tag2find}"):
+        try:
+            if not res.has_attr(inner):
+                continue  # check if inner tag (file object) exists
+            # clean special chars such as '@, # ? <>'
+            filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
+            # Added the '.html' for the html file in the href
+            if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
+                filename += '.html'
+            fileurl = urljoin(url, res.get(inner))
+            filepath = os.path.join(pagefolder, filename)
+            # rename html ref so can move html and folder of files anywhere
+            res[inner] = os.path.join(os.path.basename(pagefolder), filename)
+            # create the file.
+            if not os.path.isfile(filepath):
+                with open(filepath, 'wb') as file:
+                    filebin = self.session.get(fileurl)
+                    if len(filebin.content) > 0:  # filter the empty file (image not found)
+                        file.write(filebin.content)
+        except Exception as exc:
+            print(exc, file=sys.stderr)

From 3775d7051293cda9e1f1f784ec85ddb95f48986c Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:01:27 +0530
Subject: [PATCH 05/66] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index dc831f5..cf1fab6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ requests
 lxml
 urllib3
 bs4
+tqdm

From 498e3bb0eceac9a9d0ac781a4ba21876d3015971 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:04:31 +0530
Subject: [PATCH 06/66] Update web_dl.py

---
 web_dl.py | 55 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index b5dbdfd..74f2418 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -3,7 +3,8 @@
 import requests
 from urllib.parse import urljoin, urlparse
 from bs4 import BeautifulSoup
-from tqdm import tqdm
+from tqdm import tqdm  # For progress indicator
+
 
 class urlDownloader(object):
     """ Download the webpage components based on the input URL."""
@@ -42,29 +43,29 @@ def savePage(self, url, pagefolder='page'):
             return False
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
-    """ Saves on specified pagefolder all tag2find objects. """
-    pagefolder = os.path.join(pagefolder, tag2find)
-    if not os.path.exists(pagefolder):
-        os.mkdir(pagefolder)
-    elements = self.soup.findAll(tag2find)
-    for res in tqdm(elements, desc=f"Downloading {tag2find}"):
-        try:
-            if not res.has_attr(inner):
-                continue  # check if inner tag (file object) exists
-            # clean special chars such as '@, # ? <>'
-            filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-            # Added the '.html' for the html file in the href
-            if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
-                filename += '.html'
-            fileurl = urljoin(url, res.get(inner))
-            filepath = os.path.join(pagefolder, filename)
-            # rename html ref so can move html and folder of files anywhere
-            res[inner] = os.path.join(os.path.basename(pagefolder), filename)
-            # create the file.
-            if not os.path.isfile(filepath):
-                with open(filepath, 'wb') as file:
-                    filebin = self.session.get(fileurl)
-                    if len(filebin.content) > 0:  # filter the empty file (image not found)
-                        file.write(filebin.content)
-        except Exception as exc:
-            print(exc, file=sys.stderr)
+        """ Saves on specified pagefolder all tag2find objects. """
+        pagefolder = os.path.join(pagefolder, tag2find)
+        if not os.path.exists(pagefolder):
+            os.mkdir(pagefolder)
+        elements = self.soup.findAll(tag2find)
+        for res in tqdm(elements, desc=f"Downloading {tag2find}"):
+            try:
+                if not res.has_attr(inner):
+                    continue  # check if inner tag (file object) exists
+                # clean special chars such as '@, # ? <>'
+                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
+                # Added the '.html' for the html file in the href
+                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
+                    filename += '.html'
+                fileurl = urljoin(url, res.get(inner))
+                filepath = os.path.join(pagefolder, filename)
+                # rename html ref so can move html and folder of files anywhere
+                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
+                # create the file.
+                if not os.path.isfile(filepath):
+                    with open(filepath, 'wb') as file:
+                        filebin = self.session.get(fileurl)
+                        if len(filebin.content) > 0:  # filter the empty file (image not found)
+                            file.write(filebin.content)
+            except Exception as exc:
+                print(exc, file=sys.stderr)

From 654276b5caed52adca3669a39ea3f9823bb70817 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:15:39 +0530
Subject: [PATCH 07/66] Update requirements.txt


From 5cee0c3d503940fbdce9c54c305b5ca982145384 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:16:32 +0530
Subject: [PATCH 08/66] Update web_dl.py

---
 web_dl.py | 84 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 74f2418..b73bebb 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,71 +1,87 @@
-import os, sys
+import os
 import re
+import sys
 import requests
 from urllib.parse import urljoin, urlparse
 from bs4 import BeautifulSoup
-from tqdm import tqdm  # For progress indicator
-
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
 
 class urlDownloader(object):
-    """ Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True):
+    """Download the webpage components based on the input URL."""
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
+        self.file_size_limit = file_size_limit
+        self.max_retries = max_retries
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.session = requests.Session()
-        
+        self.summary = {
+            'images': 0,
+            'links': 0,
+            'scripts': 0
+        }
+
     def savePage(self, url, pagefolder='page'):
-        """ Save the web page components based on the input URL and dir name.
-        Args:
-            url (str): web URL string.
-            pagefolder (str, optional): path to save the web components.
-        Returns:
-            bool: whether the components saved successfully.
-        """
+        """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url)
             self.soup = BeautifulSoup(response.text, features="lxml")
             if not os.path.exists(pagefolder):
                 os.mkdir(pagefolder)
             if self.imgFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src')
+                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src', category='images')
             if self.linkFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href')
+                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
             if self.scriptFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src')
+                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            return True
+            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
+            return True, summary
         except Exception as e:
-            print(f"> savePage(): Create files failed: {str(e)}.")
-            return False
+            print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
+            return False, None
 
-    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src'):
-        """ Saves on specified pagefolder all tag2find objects. """
+    def _download_file(self, fileurl, filepath):
+        """Download a file with retry mechanism."""
+        for attempt in range(self.max_retries):
+            try:
+                filebin = self.session.get(fileurl, stream=True)
+                filebin.raise_for_status()
+                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
+                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
+                    return False
+                with open(filepath, 'wb') as file:
+                    for chunk in filebin.iter_content(chunk_size=8192):
+                        if chunk:
+                            file.write(chunk)
+                return True
+            except requests.RequestException as exc:
+                print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
+        return False
+
+    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
+        """Saves on specified pagefolder all tag2find objects."""
         pagefolder = os.path.join(pagefolder, tag2find)
         if not os.path.exists(pagefolder):
             os.mkdir(pagefolder)
         elements = self.soup.findAll(tag2find)
-        for res in tqdm(elements, desc=f"Downloading {tag2find}"):
-            try:
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = []
+            for res in tqdm(elements, desc=f"Downloading {tag2find}"):
                 if not res.has_attr(inner):
-                    continue  # check if inner tag (file object) exists
-                # clean special chars such as '@, # ? <>'
+                    continue
                 filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-                # Added the '.html' for the html file in the href
                 if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
                     filename += '.html'
                 fileurl = urljoin(url, res.get(inner))
                 filepath = os.path.join(pagefolder, filename)
-                # rename html ref so can move html and folder of files anywhere
                 res[inner] = os.path.join(os.path.basename(pagefolder), filename)
-                # create the file.
                 if not os.path.isfile(filepath):
-                    with open(filepath, 'wb') as file:
-                        filebin = self.session.get(fileurl)
-                        if len(filebin.content) > 0:  # filter the empty file (image not found)
-                            file.write(filebin.content)
-            except Exception as exc:
-                print(exc, file=sys.stderr)
+                    futures.append(executor.submit(self._download_file, fileurl, filepath))
+            for future in futures:
+                if future.result():
+                    self.summary[category] += 1

From fa0e3d9520a0237efe9ab54c5e4843d4f18b503a Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:17:39 +0530
Subject: [PATCH 09/66] Update main.py

---
 main.py | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/main.py b/main.py
index ffa0441..e0e13ff 100644
--- a/main.py
+++ b/main.py
@@ -10,9 +10,9 @@
 
 Bot = Client(
     "WebDL-Bot",
-    bot_token = BOT_TOKEN,
-    api_id = API_ID,
-    api_hash = API_HASH
+    bot_token=BOT_TOKEN,
+    api_id=API_ID,
+    api_hash=API_HASH
 )
 
 START_TXT = """
@@ -25,11 +25,10 @@
 """
 
 START_BTN = InlineKeyboardMarkup(
-        [[
-        InlineKeyboardButton('My Father', url='https://t.me/Matiz_Owner'),
-        ]]
-    )
-
+    [[
+        InlineKeyboardButton('Source Code', url='https://github.com/samadii/WebDownloaderBot'),
+    ]]
+)
 
 @Bot.on_message(filters.command(["start"]))
 async def start(bot, update):
@@ -41,33 +40,27 @@ async def start(bot, update):
         reply_markup=reply_markup
     )
 
-
-
-
 @Bot.on_message(filters.private & filters.text & ~filters.regex('/start'))
 async def webdl(_, m):
-
     if not m.text.startswith('http'):
-        return await m.reply("the URL must start with 'http' or 'https'")
+        return await m.reply("The URL must start with 'http' or 'https'")
 
-    msg = await m.reply('Processing..')
+    msg = await m.reply('Processing...')
     url = m.text
     name = dir = str(m.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
-    obj = urlDownloader(imgFlg=True, linkFlg=True, scriptFlg=True)
-    res = obj.savePage(url, dir)
+    obj = urlDownloader(imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=10*1024*1024)
+    res, summary = obj.savePage(url, dir)
     if not res:
-        return await msg.edit_text('something went wrong!')
+        return await msg.edit_text('Something went wrong!')
 
     shutil.make_archive(name, 'zip', base_dir=dir)
-    await m.reply_document(name+'.zip')
+    await m.reply_document(name+'.zip', caption=summary)
     await msg.delete()
 
     shutil.rmtree(dir)
     os.remove(name+'.zip')
 
-
-
 Bot.run()

From 1f4f57b82187a82c3032755a4b376d4ceaa15a04 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:29:12 +0530
Subject: [PATCH 10/66] Update main.py

---
 main.py | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index e0e13ff..7d227fd 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,10 @@
 import os
 import shutil
-from web_dl import urlDownloader
+import requests
 from pyrogram import Client, filters
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton
+from web_dl import urlDownloader
+import asyncio
 
 BOT_TOKEN = os.environ.get("BOT_TOKEN")
 API_ID = os.environ.get("API_ID")
@@ -20,8 +22,8 @@
 
 I can download all the components (.html, .css, img, xml, video, javascript..) from URLs.
 
-Send any URL,
-for ex: 'https://www.google.com'
+Send any URL, optionally with the components you want to download. For example:
+'https://www.google.com img,css,script'
 """
 
 START_BTN = InlineKeyboardMarkup(
@@ -40,18 +42,45 @@ async def start(bot, update):
         reply_markup=reply_markup
     )
 
+def parse_components(text):
+    components = text.split()[1:]
+    imgFlg = 'img' in components
+    linkFlg = 'css' in components
+    scriptFlg = 'script' in components
+    return imgFlg, linkFlg, scriptFlg
+
+def is_valid_url(url):
+    try:
+        response = requests.head(url, timeout=5)
+        return response.status_code == 200
+    except requests.RequestException:
+        return False
+
+async def send_progress(msg, chat_id, initial_text):
+    for i in range(10):
+        await asyncio.sleep(1)
+        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.message_id, text=f"{initial_text}\nProgress: {i*10}%")
+
 @Bot.on_message(filters.private & filters.text & ~filters.regex('/start'))
 async def webdl(_, m):
-    if not m.text.startswith('http'):
+    parts = m.text.split()
+    url = parts[0]
+
+    if not url.startswith('http'):
         return await m.reply("The URL must start with 'http' or 'https'")
 
+    if not is_valid_url(url):
+        return await m.reply("The URL is invalid or inaccessible")
+
     msg = await m.reply('Processing...')
-    url = m.text
+    asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
+
+    imgFlg, linkFlg, scriptFlg = parse_components(m.text)
     name = dir = str(m.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
-    obj = urlDownloader(imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=10*1024*1024)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024)
     res, summary = obj.savePage(url, dir)
     if not res:
         return await msg.edit_text('Something went wrong!')

From c7d76a61d4713713550c45a48b00a6633053003f Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:31:49 +0530
Subject: [PATCH 11/66] Update web_dl.py

---
 web_dl.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index b73bebb..77c486b 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -2,20 +2,21 @@
 import re
 import sys
 import requests
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urljoin
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
 
 class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
         self.file_size_limit = file_size_limit
         self.max_retries = max_retries
+        self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.session = requests.Session()
         self.summary = {
@@ -27,7 +28,7 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=No
     def savePage(self, url, pagefolder='page'):
         """Save the web page components based on the input URL and dir name."""
         try:
-            response = self.session.get(url)
+            response = self.session.get(url, auth=self.auth)
             self.soup = BeautifulSoup(response.text, features="lxml")
             if not os.path.exists(pagefolder):
                 os.mkdir(pagefolder)
@@ -49,7 +50,7 @@ def _download_file(self, fileurl, filepath):
         """Download a file with retry mechanism."""
         for attempt in range(self.max_retries):
             try:
-                filebin = self.session.get(fileurl, stream=True)
+                filebin = self.session.get(fileurl, stream=True, auth=self.auth)
                 filebin.raise_for_status()
                 if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
                     print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)

From 59ebbfcfafd22196983b7236adb711aa12721da6 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:33:21 +0530
Subject: [PATCH 12/66] Update main.py

---
 main.py | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/main.py b/main.py
index 7d227fd..1dfaa57 100644
--- a/main.py
+++ b/main.py
@@ -4,6 +4,7 @@
 from pyrogram import Client, filters
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton
 from web_dl import urlDownloader
+from auth import add_credentials, get_credentials, remove_credentials
 import asyncio
 
 BOT_TOKEN = os.environ.get("BOT_TOKEN")
@@ -24,6 +25,8 @@
 
 Send any URL, optionally with the components you want to download. For example:
 'https://www.google.com img,css,script'
+
+Use /auth to add your authentication credentials.
 """
 
 START_BTN = InlineKeyboardMarkup(
@@ -42,26 +45,11 @@ async def start(bot, update):
         reply_markup=reply_markup
     )
 
-def parse_components(text):
-    components = text.split()[1:]
-    imgFlg = 'img' in components
-    linkFlg = 'css' in components
-    scriptFlg = 'script' in components
-    return imgFlg, linkFlg, scriptFlg
-
-def is_valid_url(url):
-    try:
-        response = requests.head(url, timeout=5)
-        return response.status_code == 200
-    except requests.RequestException:
-        return False
-
-async def send_progress(msg, chat_id, initial_text):
-    for i in range(10):
-        await asyncio.sleep(1)
-        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.message_id, text=f"{initial_text}\nProgress: {i*10}%")
-
-@Bot.on_message(filters.private & filters.text & ~filters.regex('/start'))
+@Bot.on_message(filters.command(["auth"]))
+async def auth(bot, update):
+    await update.reply_text("Please send your username and password in the format 'username:password'")
+
+@Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth'))
 async def webdl(_, m):
     parts = m.text.split()
     url = parts[0]
@@ -72,6 +60,16 @@ async def webdl(_, m):
     if not is_valid_url(url):
         return await m.reply("The URL is invalid or inaccessible")
 
+    # Check if the user is sending authentication details
+    if ":" in m.text and m.text.count(":") == 1:
+        username, password = m.text.split(":")
+        add_credentials(m.chat.id, username, password)
+        return await m.reply("Credentials saved successfully.")
+
+    # Check if user has credentials saved
+    credentials = get_credentials(m.chat.id)
+    auth = (credentials['username'], credentials['password']) if credentials else None
+
     msg = await m.reply('Processing...')
     asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
 
@@ -80,7 +78,7 @@ async def webdl(_, m):
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
-    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
         return await msg.edit_text('Something went wrong!')

From d216f9bf907b5bd2bc04519152cde1bed72d9e66 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:34:21 +0530
Subject: [PATCH 13/66] Create auth.py

---
 auth.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 auth.py

diff --git a/auth.py b/auth.py
new file mode 100644
index 0000000..a4ea522
--- /dev/null
+++ b/auth.py
@@ -0,0 +1,31 @@
+# auth.py
+
+import os
+import json
+
+AUTH_FILE = 'auth.json'
+
+def load_auth_data():
+    if os.path.exists(AUTH_FILE):
+        with open(AUTH_FILE, 'r') as file:
+            return json.load(file)
+    return {}
+
+def save_auth_data(data):
+    with open(AUTH_FILE, 'w') as file:
+        json.dump(data, file, indent=4)
+
+def add_credentials(user_id, username, password):
+    data = load_auth_data()
+    data[user_id] = {'username': username, 'password': password}
+    save_auth_data(data)
+
+def get_credentials(user_id):
+    data = load_auth_data()
+    return data.get(str(user_id))
+
+def remove_credentials(user_id):
+    data = load_auth_data()
+    if str(user_id) in data:
+        del data[str(user_id)]
+        save_auth_data(data)

From 025cf5dfacdecd39894c3b0a1a328835e93d7ec8 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:38:22 +0530
Subject: [PATCH 14/66] Update main.py

---
 main.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/main.py b/main.py
index 1dfaa57..3355b71 100644
--- a/main.py
+++ b/main.py
@@ -90,4 +90,23 @@ async def webdl(_, m):
     shutil.rmtree(dir)
     os.remove(name+'.zip')
 
+def is_valid_url(url):
+    try:
+        response = requests.head(url, timeout=5)
+        return response.status_code == 200
+    except requests.RequestException:
+        return False
+
+def parse_components(text):
+    components = text.split()[1:]
+    imgFlg = 'img' in components
+    linkFlg = 'css' in components
+    scriptFlg = 'script' in components
+    return imgFlg, linkFlg, scriptFlg
+
+async def send_progress(msg, chat_id, initial_text):
+    for i in range(10):
+        await asyncio.sleep(1)
+        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.message_id, text=f"{initial_text}\nProgress: {i*10}%")
+
 Bot.run()

From bbefcaf5ba05ca243dadf16eaa565522ee648835 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 06:43:13 +0530
Subject: [PATCH 15/66] Update main.py

---
 main.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/main.py b/main.py
index 3355b71..20bb2a5 100644
--- a/main.py
+++ b/main.py
@@ -4,7 +4,7 @@
 from pyrogram import Client, filters
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton
 from web_dl import urlDownloader
-from auth import add_credentials, get_credentials, remove_credentials
+from auth import add_credentials, get_credentials
 import asyncio
 
 BOT_TOKEN = os.environ.get("BOT_TOKEN")
@@ -26,7 +26,7 @@
 Send any URL, optionally with the components you want to download. For example:
 'https://www.google.com img,css,script'
 
-Use /auth to add your authentication credentials.
+Use /auth username:password to add your authentication credentials.
 """
 
 START_BTN = InlineKeyboardMarkup(
@@ -47,7 +47,12 @@ async def start(bot, update):
 
 @Bot.on_message(filters.command(["auth"]))
 async def auth(bot, update):
-    await update.reply_text("Please send your username and password in the format 'username:password'")
+    if len(update.command) != 2 or ':' not in update.command[1]:
+        return await update.reply_text("Please send your username and password in the format 'username:password'")
+    
+    username, password = update.command[1].split(":", 1)
+    add_credentials(update.from_user.id, username, password)
+    await update.reply_text("Credentials saved successfully.")
 
 @Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth'))
 async def webdl(_, m):
@@ -60,12 +65,6 @@ async def webdl(_, m):
     if not is_valid_url(url):
         return await m.reply("The URL is invalid or inaccessible")
 
-    # Check if the user is sending authentication details
-    if ":" in m.text and m.text.count(":") == 1:
-        username, password = m.text.split(":")
-        add_credentials(m.chat.id, username, password)
-        return await m.reply("Credentials saved successfully.")
-
     # Check if user has credentials saved
     credentials = get_credentials(m.chat.id)
     auth = (credentials['username'], credentials['password']) if credentials else None

From 89e6de7018ecb973a3a6560c55fd0837a6bf26a4 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 07:14:07 +0530
Subject: [PATCH 16/66] Update main.py


From 8dff9179084b0ecc086c8dc8714da1d430bd432c Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 07:16:39 +0530
Subject: [PATCH 17/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 20bb2a5..5cbfa00 100644
--- a/main.py
+++ b/main.py
@@ -106,6 +106,6 @@ def parse_components(text):
 async def send_progress(msg, chat_id, initial_text):
     for i in range(10):
         await asyncio.sleep(1)
-        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.message_id, text=f"{initial_text}\nProgress: {i*10}%")
+        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
 
 Bot.run()

From 8844de0ef708eed8a3021a7803d043e0ef7c51db Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 07:19:03 +0530
Subject: [PATCH 18/66] Update main.py

---
 main.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 5cbfa00..16e7d00 100644
--- a/main.py
+++ b/main.py
@@ -104,8 +104,11 @@ def parse_components(text):
     return imgFlg, linkFlg, scriptFlg
 
 async def send_progress(msg, chat_id, initial_text):
-    for i in range(10):
-        await asyncio.sleep(1)
-        await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
+    try:
+        for i in range(10):
+            await asyncio.sleep(1)
+            await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
+    except Exception as e:
+        print(f"Error updating progress: {e}")
 
 Bot.run()

From 5c671bf6a5eda8443e8da4eb03a4b342a51ec3cb Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:28:54 +0530
Subject: [PATCH 19/66] Update main.py

---
 main.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 16e7d00..31bd7ef 100644
--- a/main.py
+++ b/main.py
@@ -70,7 +70,7 @@ async def webdl(_, m):
     auth = (credentials['username'], credentials['password']) if credentials else None
 
     msg = await m.reply('Processing...')
-    asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
+    await send_progress(msg, m.chat.id, "Processing...")
 
     imgFlg, linkFlg, scriptFlg = parse_components(m.text)
     name = dir = str(m.chat.id)
@@ -107,8 +107,12 @@ async def send_progress(msg, chat_id, initial_text):
     try:
         for i in range(10):
             await asyncio.sleep(1)
-            await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
+            try:
+                await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
+            except Exception as e:
+                print(f"Error updating progress: {e}", file=sys.stderr)
+                break
     except Exception as e:
-        print(f"Error updating progress: {e}")
+        print(f"Error in send_progress loop: {e}", file=sys.stderr)
 
 Bot.run()

From 1e5201558c8183e18993535161b588709d26e2d0 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:34:10 +0530
Subject: [PATCH 20/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 31bd7ef..998526e 100644
--- a/main.py
+++ b/main.py
@@ -70,7 +70,7 @@ async def webdl(_, m):
     auth = (credentials['username'], credentials['password']) if credentials else None
 
     msg = await m.reply('Processing...')
-    await send_progress(msg, m.chat.id, "Processing...")
+    asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
 
     imgFlg, linkFlg, scriptFlg = parse_components(m.text)
     name = dir = str(m.chat.id)

From e7fbd278878da586669c3a92d19ba869e6a4901f Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:34:35 +0530
Subject: [PATCH 21/66] Update web_dl.py

---
 web_dl.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/web_dl.py b/web_dl.py
index 77c486b..e4c4da1 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -29,6 +29,7 @@ def savePage(self, url, pagefolder='page'):
         """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url, auth=self.auth)
+            response.raise_for_status()
             self.soup = BeautifulSoup(response.text, features="lxml")
             if not os.path.exists(pagefolder):
                 os.mkdir(pagefolder)
@@ -82,6 +83,7 @@ def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category=
                 filepath = os.path.join(pagefolder, filename)
                 res[inner] = os.path.join(os.path.basename(pagefolder), filename)
                 if not os.path.isfile(filepath):
+                    print(f"Downloading {fileurl} to {filepath}")  # Debug statement
                     futures.append(executor.submit(self._download_file, fileurl, filepath))
             for future in futures:
                 if future.result():

From b6d509b923965169430323df79e647ac331ad7c0 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:37:15 +0530
Subject: [PATCH 22/66] Update main.py

---
 main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.py b/main.py
index 998526e..e91cf91 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,5 @@
 import os
+import sys  # Add this import statement
 import shutil
 import requests
 from pyrogram import Client, filters

From 7afa77ffbc59c38e2d8ceb0bb362b30f143b8d64 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:38:20 +0530
Subject: [PATCH 23/66] Update web_dl.py

---
 web_dl.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/web_dl.py b/web_dl.py
index e4c4da1..2212fc1 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,6 +1,6 @@
 import os
 import re
-import sys
+import sys  # Make sure to import sys here as well if needed
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
@@ -60,6 +60,7 @@ def _download_file(self, fileurl, filepath):
                     for chunk in filebin.iter_content(chunk_size=8192):
                         if chunk:
                             file.write(chunk)
+                print(f"Successfully downloaded {fileurl} to {filepath}")  # Debug statement
                 return True
             except requests.RequestException as exc:
                 print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)

From ca9b36398437c17908a3d0adb1643aa1707a822f Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:41:13 +0530
Subject: [PATCH 24/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index e91cf91..d1280b7 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,5 @@
 import os
-import sys  # Add this import statement
+import sys
 import shutil
 import requests
 from pyrogram import Client, filters

From 7ce16ebc3610304b5b61cb05857deb3570ff9690 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:42:14 +0530
Subject: [PATCH 25/66] Update web_dl.py

---
 web_dl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web_dl.py b/web_dl.py
index 2212fc1..08f872e 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,6 +1,6 @@
 import os
 import re
-import sys  # Make sure to import sys here as well if needed
+import sys
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup

From df858ba91f52312c3c06a33e7dbb72c243da0720 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 08:58:46 +0530
Subject: [PATCH 26/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index d1280b7..ed6e493 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,5 @@
 import os
-import sys
+import sys  # Ensure sys is imported
 import shutil
 import requests
 from pyrogram import Client, filters

From df90085f43beecae48d774398736919f9fe68ce9 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 09:02:55 +0530
Subject: [PATCH 27/66] Update web_dl.py

---
 web_dl.py | 68 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 08f872e..263a815 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -2,15 +2,17 @@
 import re
 import sys
 import requests
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
+from typing import Tuple, Optional, Dict
 
-class urlDownloader(object):
+class urlDownloader:
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
-        self.soup = None
+
+    def __init__(self, imgFlg: bool = True, linkFlg: bool = True, scriptFlg: bool = True, file_size_limit: Optional[int] = None, max_retries: int = 3, auth: Optional[Tuple[str, str]] = None):
+        self.soup: Optional[BeautifulSoup] = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
@@ -19,43 +21,48 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=No
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.session = requests.Session()
-        self.summary = {
+        self.summary: Dict[str, int] = {
             'images': 0,
             'links': 0,
             'scripts': 0
         }
 
-    def savePage(self, url, pagefolder='page'):
+    def savePage(self, url: str, pagefolder: str = 'page') -> Tuple[bool, Optional[str]]:
         """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url, auth=self.auth)
             response.raise_for_status()
-            self.soup = BeautifulSoup(response.text, features="lxml")
-            if not os.path.exists(pagefolder):
-                os.mkdir(pagefolder)
+            self.soup = BeautifulSoup(response.text, 'lxml')
+            os.makedirs(pagefolder, exist_ok=True)
+            
             if self.imgFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src', category='images')
+                self._soupfindnSave(url, pagefolder, 'img', 'src', 'images')
             if self.linkFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
+                self._soupfindnSave(url, pagefolder, 'link', 'href', 'links')
             if self.scriptFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
+                self._soupfindnSave(url, pagefolder, 'script', 'src', 'scripts')
+                
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
+            
             summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
             return False, None
 
-    def _download_file(self, fileurl, filepath):
+    def _download_file(self, fileurl: str, filepath: str) -> bool:
         """Download a file with retry mechanism."""
         for attempt in range(self.max_retries):
             try:
                 filebin = self.session.get(fileurl, stream=True, auth=self.auth)
                 filebin.raise_for_status()
-                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
-                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
+                
+                file_size = int(filebin.headers.get('content-length', 0))
+                if self.file_size_limit and file_size > self.file_size_limit:
+                    print(f"File {fileurl} exceeds the size limit of {self.file_size_limit} bytes.", file=sys.stderr)
                     return False
+                
                 with open(filepath, 'wb') as file:
                     for chunk in filebin.iter_content(chunk_size=8192):
                         if chunk:
@@ -66,26 +73,37 @@ def _download_file(self, fileurl, filepath):
                 print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
         return False
 
-    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
-        """Saves on specified pagefolder all tag2find objects."""
-        pagefolder = os.path.join(pagefolder, tag2find)
-        if not os.path.exists(pagefolder):
-            os.mkdir(pagefolder)
-        elements = self.soup.findAll(tag2find)
+    def _soupfindnSave(self, url: str, pagefolder: str, tag2find: str, inner: str, category: str) -> None:
+        """Saves specified tag objects in the given folder."""
+        folder_path = os.path.join(pagefolder, tag2find)
+        os.makedirs(folder_path, exist_ok=True)
+        
+        elements = self.soup.find_all(tag2find)
         with ThreadPoolExecutor(max_workers=4) as executor:
             futures = []
             for res in tqdm(elements, desc=f"Downloading {tag2find}"):
                 if not res.has_attr(inner):
                     continue
-                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
+                
+                filename = self._sanitize_filename(res.get(inner))
+                if tag2find == 'link' and not any(ext in filename for ext in self.linkType):
                     filename += '.html'
+                
                 fileurl = urljoin(url, res.get(inner))
-                filepath = os.path.join(pagefolder, filename)
-                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
+                filepath = os.path.join(folder_path, filename)
+                
+                res[inner] = os.path.join(os.path.basename(folder_path), filename)
+                
                 if not os.path.isfile(filepath):
                     print(f"Downloading {fileurl} to {filepath}")  # Debug statement
                     futures.append(executor.submit(self._download_file, fileurl, filepath))
+            
             for future in futures:
                 if future.result():
                     self.summary[category] += 1
+
+    def _sanitize_filename(self, url: str) -> str:
+        """Sanitize the filename extracted from the URL."""
+        parsed_url = urlparse(url)
+        filename = os.path.basename(parsed_url.path)
+        return re.sub(r'\W+', '.', filename)

From b9bb481520d3d27d4d3dbd74b6743efeb89440b3 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 09:03:18 +0530
Subject: [PATCH 28/66] Update main.py


From d8dc8a48de2a108f44af2eec5d194b374e89b47e Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 09:31:53 +0530
Subject: [PATCH 29/66] Update main.py

---
 main.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index ed6e493..8240d94 100644
--- a/main.py
+++ b/main.py
@@ -71,7 +71,7 @@ async def webdl(_, m):
     auth = (credentials['username'], credentials['password']) if credentials else None
 
     msg = await m.reply('Processing...')
-    asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
+    progress_task = asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
 
     imgFlg, linkFlg, scriptFlg = parse_components(m.text)
     name = dir = str(m.chat.id)
@@ -81,7 +81,9 @@ async def webdl(_, m):
     obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
-        return await msg.edit_text('Something went wrong!')
+        await msg.edit_text('Something went wrong!')
+        progress_task.cancel()
+        return
 
     shutil.make_archive(name, 'zip', base_dir=dir)
     await m.reply_document(name+'.zip', caption=summary)
@@ -89,6 +91,7 @@ async def webdl(_, m):
 
     shutil.rmtree(dir)
     os.remove(name+'.zip')
+    progress_task.cancel()
 
 def is_valid_url(url):
     try:
@@ -111,8 +114,11 @@ async def send_progress(msg, chat_id, initial_text):
             try:
                 await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
             except Exception as e:
+                if "MESSAGE_ID_INVALID" in str(e):
+                    print(f"Message ID invalid: {e}", file=sys.stderr)
+                    break
                 print(f"Error updating progress: {e}", file=sys.stderr)
-                break
+                continue
     except Exception as e:
         print(f"Error in send_progress loop: {e}", file=sys.stderr)
 

From 8bf8a4018ea4262acc8a9cb05e14facaf7ea66e0 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 09:37:52 +0530
Subject: [PATCH 30/66] Update main.py

---
 main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/main.py b/main.py
index 8240d94..cd8ea55 100644
--- a/main.py
+++ b/main.py
@@ -74,6 +74,7 @@ async def webdl(_, m):
     progress_task = asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
 
     imgFlg, linkFlg, scriptFlg = parse_components(m.text)
+    print(f"Flags - img: {imgFlg}, link: {linkFlg}, script: {scriptFlg}")  # Debug statement
     name = dir = str(m.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)

From 8eeaa057c1c1b223f73d9a79f9951dbd22ee03e7 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 09:38:45 +0530
Subject: [PATCH 31/66] Update web_dl.py

---
 web_dl.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/web_dl.py b/web_dl.py
index 263a815..67423a5 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -35,17 +35,23 @@ def savePage(self, url: str, pagefolder: str = 'page') -> Tuple[bool, Optional[s
             self.soup = BeautifulSoup(response.text, 'lxml')
             os.makedirs(pagefolder, exist_ok=True)
             
+            print(f"Starting to download components from {url}")
+
             if self.imgFlg:
+                print("Downloading images...")
                 self._soupfindnSave(url, pagefolder, 'img', 'src', 'images')
             if self.linkFlg:
+                print("Downloading links...")
                 self._soupfindnSave(url, pagefolder, 'link', 'href', 'links')
             if self.scriptFlg:
+                print("Downloading scripts...")
                 self._soupfindnSave(url, pagefolder, 'script', 'src', 'scripts')
                 
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
             
             summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
+            print(summary)
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)

From 2b8ec6721680fc4d2ec55f170d13f1f6a16d136c Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:03:28 +0530
Subject: [PATCH 32/66] Update main.py

---
 main.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index cd8ea55..2fa2eb7 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,5 @@
 import os
-import sys  # Ensure sys is imported
+import sys
 import shutil
 import requests
 from pyrogram import Client, filters
@@ -94,13 +94,6 @@ async def webdl(_, m):
     os.remove(name+'.zip')
     progress_task.cancel()
 
-def is_valid_url(url):
-    try:
-        response = requests.head(url, timeout=5)
-        return response.status_code == 200
-    except requests.RequestException:
-        return False
-
 def parse_components(text):
     components = text.split()[1:]
     imgFlg = 'img' in components
@@ -108,6 +101,13 @@ def parse_components(text):
     scriptFlg = 'script' in components
     return imgFlg, linkFlg, scriptFlg
 
+def is_valid_url(url):
+    try:
+        response = requests.head(url, timeout=5)
+        return response.status_code == 200
+    except requests.RequestException:
+        return False
+
 async def send_progress(msg, chat_id, initial_text):
     try:
         for i in range(10):

From 87ac1013a592395928d9b1d1754e82c0619d9950 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:07:38 +0530
Subject: [PATCH 33/66] Update web_dl.py

---
 web_dl.py | 77 ++++++++++++++++++++-----------------------------------
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 67423a5..77578fd 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -2,17 +2,15 @@
 import re
 import sys
 import requests
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urljoin
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
-from typing import Tuple, Optional, Dict
 
-class urlDownloader:
+class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-
-    def __init__(self, imgFlg: bool = True, linkFlg: bool = True, scriptFlg: bool = True, file_size_limit: Optional[int] = None, max_retries: int = 3, auth: Optional[Tuple[str, str]] = None):
-        self.soup: Optional[BeautifulSoup] = None
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
+        self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
@@ -21,54 +19,43 @@ def __init__(self, imgFlg: bool = True, linkFlg: bool = True, scriptFlg: bool =
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.session = requests.Session()
-        self.summary: Dict[str, int] = {
+        self.summary = {
             'images': 0,
             'links': 0,
             'scripts': 0
         }
 
-    def savePage(self, url: str, pagefolder: str = 'page') -> Tuple[bool, Optional[str]]:
+    def savePage(self, url, pagefolder='page'):
         """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url, auth=self.auth)
             response.raise_for_status()
-            self.soup = BeautifulSoup(response.text, 'lxml')
-            os.makedirs(pagefolder, exist_ok=True)
-            
-            print(f"Starting to download components from {url}")
-
+            self.soup = BeautifulSoup(response.text, features="lxml")
+            if not os.path.exists(pagefolder):
+                os.mkdir(pagefolder)
             if self.imgFlg:
-                print("Downloading images...")
-                self._soupfindnSave(url, pagefolder, 'img', 'src', 'images')
+                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src', category='images')
             if self.linkFlg:
-                print("Downloading links...")
-                self._soupfindnSave(url, pagefolder, 'link', 'href', 'links')
+                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
             if self.scriptFlg:
-                print("Downloading scripts...")
-                self._soupfindnSave(url, pagefolder, 'script', 'src', 'scripts')
-                
+                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            
             summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
-            print(summary)
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
             return False, None
 
-    def _download_file(self, fileurl: str, filepath: str) -> bool:
+    def _download_file(self, fileurl, filepath):
         """Download a file with retry mechanism."""
         for attempt in range(self.max_retries):
             try:
                 filebin = self.session.get(fileurl, stream=True, auth=self.auth)
                 filebin.raise_for_status()
-                
-                file_size = int(filebin.headers.get('content-length', 0))
-                if self.file_size_limit and file_size > self.file_size_limit:
-                    print(f"File {fileurl} exceeds the size limit of {self.file_size_limit} bytes.", file=sys.stderr)
+                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
+                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
                     return False
-                
                 with open(filepath, 'wb') as file:
                     for chunk in filebin.iter_content(chunk_size=8192):
                         if chunk:
@@ -79,37 +66,29 @@ def _download_file(self, fileurl: str, filepath: str) -> bool:
                 print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
         return False
 
-    def _soupfindnSave(self, url: str, pagefolder: str, tag2find: str, inner: str, category: str) -> None:
-        """Saves specified tag objects in the given folder."""
-        folder_path = os.path.join(pagefolder, tag2find)
-        os.makedirs(folder_path, exist_ok=True)
-        
-        elements = self.soup.find_all(tag2find)
+    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
+        """Saves on specified pagefolder all tag2find objects."""
+        pagefolder = os.path.join(pagefolder, tag2find)
+        if not os.path.exists(pagefolder):
+            os.mkdir(pagefolder)
+        elements = self.soup.findAll(tag2find)
+        if not elements:
+            print(f"No {tag2find} elements found.", file=sys.stderr)
         with ThreadPoolExecutor(max_workers=4) as executor:
             futures = []
             for res in tqdm(elements, desc=f"Downloading {tag2find}"):
                 if not res.has_attr(inner):
                     continue
-                
-                filename = self._sanitize_filename(res.get(inner))
-                if tag2find == 'link' and not any(ext in filename for ext in self.linkType):
+                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
+                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
                     filename += '.html'
-                
                 fileurl = urljoin(url, res.get(inner))
-                filepath = os.path.join(folder_path, filename)
-                
-                res[inner] = os.path.join(os.path.basename(folder_path), filename)
-                
+                filepath = os.path.join(pagefolder, filename)
+                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
                 if not os.path.isfile(filepath):
                     print(f"Downloading {fileurl} to {filepath}")  # Debug statement
                     futures.append(executor.submit(self._download_file, fileurl, filepath))
-            
             for future in futures:
                 if future.result():
                     self.summary[category] += 1
-
-    def _sanitize_filename(self, url: str) -> str:
-        """Sanitize the filename extracted from the URL."""
-        parsed_url = urlparse(url)
-        filename = os.path.basename(parsed_url.path)
-        return re.sub(r'\W+', '.', filename)
+        print(f"Completed downloading {tag2find} elements. Total: {self.summary[category]}")  # Debug statement

From a4aa730fe75ce426662ec4ef131385ee107b7907 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:14:01 +0530
Subject: [PATCH 34/66] Update main.py

---
 main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 2fa2eb7..b19b3fa 100644
--- a/main.py
+++ b/main.py
@@ -95,10 +95,11 @@ async def webdl(_, m):
     progress_task.cancel()
 
 def parse_components(text):
-    components = text.split()[1:]
+    components = text.split()[1:]  # Skip the URL part
     imgFlg = 'img' in components
     linkFlg = 'css' in components
     scriptFlg = 'script' in components
+    print(f"Parsed components: imgFlg={imgFlg}, linkFlg={linkFlg}, scriptFlg={scriptFlg}")  # Debug statement
     return imgFlg, linkFlg, scriptFlg
 
 def is_valid_url(url):

From 0e3df0cf0a819ca21fc442dd4464955284734718 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:17:37 +0530
Subject: [PATCH 35/66] Update main.py


From a5d3f2f3a067d906ede819ed6c0a59c5cd73e1ea Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:22:31 +0530
Subject: [PATCH 36/66] Update main.py

---
 main.py | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/main.py b/main.py
index b19b3fa..b30ab23 100644
--- a/main.py
+++ b/main.py
@@ -59,6 +59,8 @@ async def auth(bot, update):
 async def webdl(_, m):
     parts = m.text.split()
     url = parts[0]
+    components = parts[1:]  # Extract components from the message
+    download_directly = False
 
     if not url.startswith('http'):
         return await m.reply("The URL must start with 'http' or 'https'")
@@ -66,15 +68,29 @@ async def webdl(_, m):
     if not is_valid_url(url):
         return await m.reply("The URL is invalid or inaccessible")
 
-    # Check if user has credentials saved
-    credentials = get_credentials(m.chat.id)
-    auth = (credentials['username'], credentials['password']) if credentials else None
-
-    msg = await m.reply('Processing...')
-    progress_task = asyncio.create_task(send_progress(msg, m.chat.id, "Processing..."))
+    # Check if components are specified in the message
+    if components:
+        imgFlg, linkFlg, scriptFlg = parse_components(' '.join(components))
+        print(f"Flags - img: {imgFlg}, link: {linkFlg}, script: {scriptFlg}")  # Debug statement
+    else:
+        # No components specified, prompt user with options
+        keyboard = InlineKeyboardMarkup(
+            [
+                [
+                    InlineKeyboardButton("HTML", callback_data="html"),
+                    InlineKeyboardButton("CSS", callback_data="css"),
+                    InlineKeyboardButton("Images", callback_data="images")
+                ],
+                [
+                    InlineKeyboardButton("XML", callback_data="xml"),
+                    InlineKeyboardButton("Video", callback_data="video"),
+                    InlineKeyboardButton("JavaScript", callback_data="js")
+                ]
+            ]
+        )
+        await m.reply("Please select which components to download:", reply_markup=keyboard)
+        return
 
-    imgFlg, linkFlg, scriptFlg = parse_components(m.text)
-    print(f"Flags - img: {imgFlg}, link: {linkFlg}, script: {scriptFlg}")  # Debug statement
     name = dir = str(m.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
@@ -82,24 +98,21 @@ async def webdl(_, m):
     obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
-        await msg.edit_text('Something went wrong!')
-        progress_task.cancel()
-        return
+        return await m.reply('Something went wrong!')
 
     shutil.make_archive(name, 'zip', base_dir=dir)
     await m.reply_document(name+'.zip', caption=summary)
-    await msg.delete()
 
     shutil.rmtree(dir)
     os.remove(name+'.zip')
-    progress_task.cancel()
+
+    print("Download completed successfully!")  # Debug statement
 
 def parse_components(text):
-    components = text.split()[1:]  # Skip the URL part
+    components = text.split()
     imgFlg = 'img' in components
     linkFlg = 'css' in components
     scriptFlg = 'script' in components
-    print(f"Parsed components: imgFlg={imgFlg}, linkFlg={linkFlg}, scriptFlg={scriptFlg}")  # Debug statement
     return imgFlg, linkFlg, scriptFlg
 
 def is_valid_url(url):

From dc1de97c9a6fa69ce01af1778c717576f1af4ea4 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:55:21 +0530
Subject: [PATCH 37/66] Update main.py

---
 main.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/main.py b/main.py
index b30ab23..d54bd23 100644
--- a/main.py
+++ b/main.py
@@ -1,3 +1,100 @@
+# web_dl.py
+import os
+import re
+import sys
+import requests
+from urllib.parse import urljoin
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor
+
+class urlDownloader(object):
+    """Download the webpage components based on the input URL."""
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
+        self.soup = None
+        self.imgFlg = imgFlg
+        self.linkFlg = linkFlg
+        self.scriptFlg = scriptFlg
+        self.file_size_limit = file_size_limit
+        self.max_retries = max_retries
+        self.auth = auth
+        self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
+        self.session = requests.Session()
+        self.summary = {
+            'images': 0,
+            'links': 0,
+            'scripts': 0
+        }
+
+    def savePage(self, url, pagefolder='page'):
+        """Save the web page components based on the input URL and dir name."""
+        try:
+            response = self.session.get(url, auth=self.auth)
+            response.raise_for_status()
+            self.soup = BeautifulSoup(response.text, features="lxml")
+            if not os.path.exists(pagefolder):
+                os.mkdir(pagefolder)
+            if self.imgFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src', category='images')
+            if self.linkFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
+            if self.scriptFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
+            with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
+                file.write(self.soup.prettify('utf-8'))
+            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
+            return True, summary
+        except Exception as e:
+            print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
+            return False, None
+
+    def _download_file(self, fileurl, filepath):
+        """Download a file with retry mechanism."""
+        for attempt in range(self.max_retries):
+            try:
+                filebin = self.session.get(fileurl, stream=True, auth=self.auth)
+                filebin.raise_for_status()
+                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
+                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
+                    return False
+                with open(filepath, 'wb') as file:
+                    for chunk in filebin.iter_content(chunk_size=8192):
+                        if chunk:
+                            file.write(chunk)
+                print(f"Successfully downloaded {fileurl} to {filepath}")  # Debug statement
+                return True
+            except requests.RequestException as exc:
+                print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
+        return False
+
+    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
+        """Saves on specified pagefolder all tag2find objects."""
+        pagefolder = os.path.join(pagefolder, tag2find)
+        if not os.path.exists(pagefolder):
+            os.mkdir(pagefolder)
+        elements = self.soup.findAll(tag2find)
+        if not elements:
+            print(f"No {tag2find} elements found.", file=sys.stderr)
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = []
+            for res in tqdm(elements, desc=f"Downloading {tag2find}"):
+                if not res.has_attr(inner):
+                    continue
+                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
+                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
+                    filename += '.html'
+                fileurl = urljoin(url, res.get(inner))
+                filepath = os.path.join(pagefolder, filename)
+                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
+                if not os.path.isfile(filepath):
+                    print(f"Downloading {fileurl} to {filepath}")  # Debug statement
+                    futures.append(executor.submit(self._download_file, fileurl, filepath))
+            for future in futures:
+                if future.result():
+                    self.summary[category] += 1
+        print(f"Completed downloading {tag2find} elements. Total: {self.summary[category]}")  # Debug statement
+
+# main.py
 import os
 import sys
 import shutil
@@ -8,6 +105,7 @@
 from auth import add_credentials, get_credentials
 import asyncio
 
+# Bot configuration using environment variables
 BOT_TOKEN = os.environ.get("BOT_TOKEN")
 API_ID = os.environ.get("API_ID")
 API_HASH = os.environ.get("API_HASH")

From 630622a3026669efe70d338ee6930cb1b35deb6e Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:57:55 +0530
Subject: [PATCH 38/66] Update main.py

---
 main.py | 161 ++++++++++++--------------------------------------------
 1 file changed, 35 insertions(+), 126 deletions(-)

diff --git a/main.py b/main.py
index d54bd23..2d4a0ac 100644
--- a/main.py
+++ b/main.py
@@ -1,106 +1,10 @@
-# web_dl.py
-import os
-import re
-import sys
-import requests
-from urllib.parse import urljoin
-from bs4 import BeautifulSoup
-from tqdm import tqdm
-from concurrent.futures import ThreadPoolExecutor
-
-class urlDownloader(object):
-    """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
-        self.soup = None
-        self.imgFlg = imgFlg
-        self.linkFlg = linkFlg
-        self.scriptFlg = scriptFlg
-        self.file_size_limit = file_size_limit
-        self.max_retries = max_retries
-        self.auth = auth
-        self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
-        self.session = requests.Session()
-        self.summary = {
-            'images': 0,
-            'links': 0,
-            'scripts': 0
-        }
-
-    def savePage(self, url, pagefolder='page'):
-        """Save the web page components based on the input URL and dir name."""
-        try:
-            response = self.session.get(url, auth=self.auth)
-            response.raise_for_status()
-            self.soup = BeautifulSoup(response.text, features="lxml")
-            if not os.path.exists(pagefolder):
-                os.mkdir(pagefolder)
-            if self.imgFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='img', inner='src', category='images')
-            if self.linkFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
-            if self.scriptFlg:
-                self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
-            with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
-                file.write(self.soup.prettify('utf-8'))
-            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
-            return True, summary
-        except Exception as e:
-            print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
-            return False, None
-
-    def _download_file(self, fileurl, filepath):
-        """Download a file with retry mechanism."""
-        for attempt in range(self.max_retries):
-            try:
-                filebin = self.session.get(fileurl, stream=True, auth=self.auth)
-                filebin.raise_for_status()
-                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
-                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
-                    return False
-                with open(filepath, 'wb') as file:
-                    for chunk in filebin.iter_content(chunk_size=8192):
-                        if chunk:
-                            file.write(chunk)
-                print(f"Successfully downloaded {fileurl} to {filepath}")  # Debug statement
-                return True
-            except requests.RequestException as exc:
-                print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
-        return False
-
-    def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
-        """Saves on specified pagefolder all tag2find objects."""
-        pagefolder = os.path.join(pagefolder, tag2find)
-        if not os.path.exists(pagefolder):
-            os.mkdir(pagefolder)
-        elements = self.soup.findAll(tag2find)
-        if not elements:
-            print(f"No {tag2find} elements found.", file=sys.stderr)
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            futures = []
-            for res in tqdm(elements, desc=f"Downloading {tag2find}"):
-                if not res.has_attr(inner):
-                    continue
-                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
-                    filename += '.html'
-                fileurl = urljoin(url, res.get(inner))
-                filepath = os.path.join(pagefolder, filename)
-                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
-                if not os.path.isfile(filepath):
-                    print(f"Downloading {fileurl} to {filepath}")  # Debug statement
-                    futures.append(executor.submit(self._download_file, fileurl, filepath))
-            for future in futures:
-                if future.result():
-                    self.summary[category] += 1
-        print(f"Completed downloading {tag2find} elements. Total: {self.summary[category]}")  # Debug statement
-
 # main.py
 import os
 import sys
 import shutil
 import requests
 from pyrogram import Client, filters
-from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton
+from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery
 from web_dl import urlDownloader
 from auth import add_credentials, get_credentials
 import asyncio
@@ -155,10 +59,7 @@ async def auth(bot, update):
 
 @Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth'))
 async def webdl(_, m):
-    parts = m.text.split()
-    url = parts[0]
-    components = parts[1:]  # Extract components from the message
-    download_directly = False
+    url = m.text.strip()
 
     if not url.startswith('http'):
         return await m.reply("The URL must start with 'http' or 'https'")
@@ -166,40 +67,46 @@ async def webdl(_, m):
     if not is_valid_url(url):
         return await m.reply("The URL is invalid or inaccessible")
 
-    # Check if components are specified in the message
-    if components:
-        imgFlg, linkFlg, scriptFlg = parse_components(' '.join(components))
-        print(f"Flags - img: {imgFlg}, link: {linkFlg}, script: {scriptFlg}")  # Debug statement
-    else:
-        # No components specified, prompt user with options
-        keyboard = InlineKeyboardMarkup(
+    # Show buttons for selecting components to download
+    keyboard = InlineKeyboardMarkup(
+        [
+            [
+                InlineKeyboardButton("HTML", callback_data=f"html|{url}"),
+                InlineKeyboardButton("CSS", callback_data=f"css|{url}"),
+                InlineKeyboardButton("Images", callback_data=f"img|{url}")
+            ],
             [
-                [
-                    InlineKeyboardButton("HTML", callback_data="html"),
-                    InlineKeyboardButton("CSS", callback_data="css"),
-                    InlineKeyboardButton("Images", callback_data="images")
-                ],
-                [
-                    InlineKeyboardButton("XML", callback_data="xml"),
-                    InlineKeyboardButton("Video", callback_data="video"),
-                    InlineKeyboardButton("JavaScript", callback_data="js")
-                ]
+                InlineKeyboardButton("XML", callback_data=f"xml|{url}"),
+                InlineKeyboardButton("Video", callback_data=f"video|{url}"),
+                InlineKeyboardButton("JavaScript", callback_data=f"script|{url}")
             ]
-        )
-        await m.reply("Please select which components to download:", reply_markup=keyboard)
-        return
+        ]
+    )
+    await m.reply("Please select which components to download:", reply_markup=keyboard)
+
+@Bot.on_callback_query()
+async def callback_query_handler(bot, update: CallbackQuery):
+    data = update.data.split("|")
+    component, url = data[0], data[1]
+
+    imgFlg = component == 'img'
+    linkFlg = component == 'css'
+    scriptFlg = component == 'script'
+    videoFlg = component == 'video'
+    xmlFlg = component == 'xml'
 
-    name = dir = str(m.chat.id)
+    name = dir = str(update.message.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
-    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, file_size_limit=10*1024*1024, auth=auth)
+    auth = get_credentials(update.from_user.id)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
-        return await m.reply('Something went wrong!')
+        return await update.message.reply('Something went wrong!')
 
     shutil.make_archive(name, 'zip', base_dir=dir)
-    await m.reply_document(name+'.zip', caption=summary)
+    await update.message.reply_document(name+'.zip', caption=summary)
 
     shutil.rmtree(dir)
     os.remove(name+'.zip')
@@ -211,7 +118,9 @@ def parse_components(text):
     imgFlg = 'img' in components
     linkFlg = 'css' in components
     scriptFlg = 'script' in components
-    return imgFlg, linkFlg, scriptFlg
+    videoFlg = 'video' in components
+    xmlFlg = 'xml' in components
+    return imgFlg, linkFlg, scriptFlg, videoFlg, xmlFlg
 
 def is_valid_url(url):
     try:

From 69337d1ddedf2c3bb605b4707502519ea4a23db0 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 10:58:25 +0530
Subject: [PATCH 39/66] Update web_dl.py

---
 web_dl.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 77578fd..019709f 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,3 +1,4 @@
+# web_dl.py
 import os
 import re
 import sys
@@ -9,20 +10,25 @@
 
 class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, file_size_limit=None, max_retries=3, auth=None):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
+        self.videoFlg = videoFlg
+        self.xmlFlg = xmlFlg
         self.file_size_limit = file_size_limit
         self.max_retries = max_retries
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
+        self.videoType = ('mp4', 'webm', 'ogg')
         self.session = requests.Session()
         self.summary = {
             'images': 0,
             'links': 0,
-            'scripts': 0
+            'scripts': 0,
+            'videos': 0,
+            'xmls': 0
         }
 
     def savePage(self, url, pagefolder='page'):
@@ -39,9 +45,13 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='link', inner='href', category='links')
             if self.scriptFlg:
                 self._soupfindnSave(url, pagefolder, tag2find='script', inner='src', category='scripts')
+            if self.videoFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='video', inner='src', category='videos')
+            if self.xmlFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts."
+            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls."
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)

From efe5cd85f868db12c21b2f5304f38a586610461c Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:02:35 +0530
Subject: [PATCH 40/66] Update web_dl.py

---
 web_dl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 019709f..e1cfefa 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,4 +1,3 @@
-# web_dl.py
 import os
 import re
 import sys
@@ -51,7 +50,8 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls."
+            summary = (f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, "
+                       f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)

From 3ad64808e4bb786f565bc3f3f9fdea6f989d5518 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:04:17 +0530
Subject: [PATCH 41/66] Update main.py

---
 main.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 2d4a0ac..953db6a 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,3 @@
-# main.py
 import os
 import sys
 import shutil
@@ -86,8 +85,8 @@ async def webdl(_, m):
 
 @Bot.on_callback_query()
 async def callback_query_handler(bot, update: CallbackQuery):
-    data = update.data.split("|")
-    component, url = data[0], data[1]
+    data = update.data
+    component, url = data.split('|', 1)
 
     imgFlg = component == 'img'
     linkFlg = component == 'css'

From 67c0ea311073f2f08c98722ff9c35cab97a62eec Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:07:35 +0530
Subject: [PATCH 42/66] Update main.py

---
 main.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/main.py b/main.py
index 953db6a..ec94e87 100644
--- a/main.py
+++ b/main.py
@@ -70,14 +70,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"html|{url}"),
-                InlineKeyboardButton("CSS", callback_data=f"css|{url}"),
-                InlineKeyboardButton("Images", callback_data=f"img|{url}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"xml|{url}"),
-                InlineKeyboardButton("Video", callback_data=f"video|{url}"),
-                InlineKeyboardButton("JavaScript", callback_data=f"script|{url}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
+                InlineKeyboardButton("JavaScript", callback_data=f"j|{url}")
             ]
         ]
     )
@@ -88,11 +88,11 @@ async def callback_query_handler(bot, update: CallbackQuery):
     data = update.data
     component, url = data.split('|', 1)
 
-    imgFlg = component == 'img'
-    linkFlg = component == 'css'
-    scriptFlg = component == 'script'
-    videoFlg = component == 'video'
-    xmlFlg = component == 'xml'
+    imgFlg = component == 'i'
+    linkFlg = component == 'c'
+    scriptFlg = component == 'j'
+    videoFlg = component == 'v'
+    xmlFlg = component == 'x'
 
     name = dir = str(update.message.chat.id)
     if not os.path.isdir(dir):

From 32a0f9be5fea48d88ebabdb1a1c35daf88a85172 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:10:35 +0530
Subject: [PATCH 43/66] Update main.py

---
 main.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py
index ec94e87..6b2553a 100644
--- a/main.py
+++ b/main.py
@@ -70,14 +70,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
-                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
-                InlineKeyboardButton("Images", callback_data=f"i|{url}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url[:50]}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url[:50]}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url[:50]}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
-                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
-                InlineKeyboardButton("JavaScript", callback_data=f"j|{url}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url[:50]}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url[:50]}"),
+                InlineKeyboardButton("JavaScript", callback_data=f"j|{url[:50]}")
             ]
         ]
     )
@@ -86,7 +86,10 @@ async def webdl(_, m):
 @Bot.on_callback_query()
 async def callback_query_handler(bot, update: CallbackQuery):
     data = update.data
-    component, url = data.split('|', 1)
+    component, url_fragment = data.split('|', 1)
+
+    # Reconstruct the URL
+    url = update.message.reply_to_message.text.split()[0]
 
     imgFlg = component == 'i'
     linkFlg = component == 'c'

From 7fdf7eb35afb5511cd7686af76c7b54b9959ea88 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:13:28 +0530
Subject: [PATCH 44/66] Update main.py

---
 main.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/main.py b/main.py
index 6b2553a..ec94e87 100644
--- a/main.py
+++ b/main.py
@@ -70,14 +70,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"h|{url[:50]}"),
-                InlineKeyboardButton("CSS", callback_data=f"c|{url[:50]}"),
-                InlineKeyboardButton("Images", callback_data=f"i|{url[:50]}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"x|{url[:50]}"),
-                InlineKeyboardButton("Video", callback_data=f"v|{url[:50]}"),
-                InlineKeyboardButton("JavaScript", callback_data=f"j|{url[:50]}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
+                InlineKeyboardButton("JavaScript", callback_data=f"j|{url}")
             ]
         ]
     )
@@ -86,10 +86,7 @@ async def webdl(_, m):
 @Bot.on_callback_query()
 async def callback_query_handler(bot, update: CallbackQuery):
     data = update.data
-    component, url_fragment = data.split('|', 1)
-
-    # Reconstruct the URL
-    url = update.message.reply_to_message.text.split()[0]
+    component, url = data.split('|', 1)
 
     imgFlg = component == 'i'
     linkFlg = component == 'c'

From 0ecca274a20e0f3998ddee33179cf118456b2109 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:26:30 +0530
Subject: [PATCH 45/66] Update web_dl.py

---
 web_dl.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index e1cfefa..0e53aa7 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -9,7 +9,7 @@
 
 class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=False, xmlFlg=False, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
@@ -20,7 +20,6 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xml
         self.max_retries = max_retries
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
-        self.videoType = ('mp4', 'webm', 'ogg')
         self.session = requests.Session()
         self.summary = {
             'images': 0,
@@ -50,8 +49,7 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            summary = (f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, "
-                       f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
+            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls."
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)

From 3bb448747f5d96bee81eaab5fa0feee38b7e1358 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:28:29 +0530
Subject: [PATCH 46/66] Update main.py


From 26ed465b55ee4d1d2acdd8328be1403b5f6222c2 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:33:21 +0530
Subject: [PATCH 47/66] Update web_dl.py

---
 web_dl.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 0e53aa7..731d76a 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -9,7 +9,7 @@
 
 class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=False, xmlFlg=False, file_size_limit=None, max_retries=3, auth=None):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
@@ -20,6 +20,7 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=False, xm
         self.max_retries = max_retries
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
+        self.videoType = ('mp4', 'webm', 'ogg')
         self.session = requests.Session()
         self.summary = {
             'images': 0,
@@ -49,8 +50,8 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
             with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
                 file.write(self.soup.prettify('utf-8'))
-            summary = f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, {self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls."
-            return True, summary
+            summary = (f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, "
+                       f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
             return False, None

From 6d5e14f529f0fc254eb6b2dff2b826dcddaa444f Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:37:59 +0530
Subject: [PATCH 48/66] Update web_dl.py

---
 web_dl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web_dl.py b/web_dl.py
index 731d76a..e1cfefa 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -52,6 +52,7 @@ def savePage(self, url, pagefolder='page'):
                 file.write(self.soup.prettify('utf-8'))
             summary = (f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, "
                        f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
+            return True, summary
         except Exception as e:
             print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
             return False, None

From 12e655cb589ec43a862f50017425cafec78f0356 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:42:42 +0530
Subject: [PATCH 49/66] Update main.py

---
 main.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index ec94e87..8038486 100644
--- a/main.py
+++ b/main.py
@@ -5,7 +5,7 @@
 from pyrogram import Client, filters
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery
 from web_dl import urlDownloader
-from auth import add_credentials, get_credentials
+from auth import add_credentials, get_credentials, remove_credentials, get_all_credentials
 import asyncio
 
 # Bot configuration using environment variables
@@ -29,6 +29,8 @@
 'https://www.google.com img,css,script'
 
 Use /auth username:password to add your authentication credentials.
+Use /remove_auth to remove your authentication credentials.
+Use /view_auth to view your stored authentication credentials.
 """
 
 START_BTN = InlineKeyboardMarkup(
@@ -56,7 +58,23 @@ async def auth(bot, update):
     add_credentials(update.from_user.id, username, password)
     await update.reply_text("Credentials saved successfully.")
 
-@Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth'))
+@Bot.on_message(filters.command(["remove_auth"]))
+async def remove_auth(bot, update):
+    success = remove_credentials(update.from_user.id)
+    if success:
+        await update.reply_text("Credentials removed successfully.")
+    else:
+        await update.reply_text("No credentials found to remove.")
+
+@Bot.on_message(filters.command(["view_auth"]))
+async def view_auth(bot, update):
+    creds = get_credentials(update.from_user.id)
+    if creds:
+        await update.reply_text(f"Your credentials:\nUsername: {creds['username']}\nPassword: {creds['password']}")
+    else:
+        await update.reply_text("No credentials found.")
+
+@Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth|/remove_auth|/view_auth'))
 async def webdl(_, m):
     url = m.text.strip()
 
@@ -77,7 +95,7 @@ async def webdl(_, m):
             [
                 InlineKeyboardButton("XML", callback_data=f"x|{url}"),
                 InlineKeyboardButton("Video", callback_data=f"v|{url}"),
-                InlineKeyboardButton("JavaScript", callback_data=f"j|{url}")
+                InlineKeyboardButton("JS", callback_data=f"j|{url}")
             ]
         ]
     )

From 63476d3caf7ea28c6137c4f35ba5e4346d59bac1 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:44:24 +0530
Subject: [PATCH 50/66] Update main.py

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 8038486..4d04155 100644
--- a/main.py
+++ b/main.py
@@ -5,7 +5,7 @@
 from pyrogram import Client, filters
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery
 from web_dl import urlDownloader
-from auth import add_credentials, get_credentials, remove_credentials, get_all_credentials
+from auth import add_credentials, get_credentials, remove_credentials
 import asyncio
 
 # Bot configuration using environment variables

From 2b14828789581f3dbecb5ef1e4b3d5259dfadd5c Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:45:26 +0530
Subject: [PATCH 51/66] Update web_dl.py

---
 web_dl.py | 88 +++++++++++++++++++++++++++----------------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index e1cfefa..b770a8b 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -54,51 +54,49 @@ def savePage(self, url, pagefolder='page'):
                        f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
             return True, summary
         except Exception as e:
-            print(f"> savePage(): Create files failed: {str(e)}.", file=sys.stderr)
-            return False, None
-
-    def _download_file(self, fileurl, filepath):
-        """Download a file with retry mechanism."""
-        for attempt in range(self.max_retries):
-            try:
-                filebin = self.session.get(fileurl, stream=True, auth=self.auth)
-                filebin.raise_for_status()
-                if self.file_size_limit and int(filebin.headers.get('content-length', 0)) > self.file_size_limit:
-                    print(f"File {fileurl} exceeds the size limit.", file=sys.stderr)
-                    return False
-                with open(filepath, 'wb') as file:
-                    for chunk in filebin.iter_content(chunk_size=8192):
-                        if chunk:
-                            file.write(chunk)
-                print(f"Successfully downloaded {fileurl} to {filepath}")  # Debug statement
-                return True
-            except requests.RequestException as exc:
-                print(f"Attempt {attempt + 1} failed for {fileurl}: {exc}", file=sys.stderr)
-        return False
+            print(f"> savePage(): Create page error: {str(e)}")
+            return False, str(e)
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
-        """Saves on specified pagefolder all tag2find objects."""
-        pagefolder = os.path.join(pagefolder, tag2find)
-        if not os.path.exists(pagefolder):
-            os.mkdir(pagefolder)
-        elements = self.soup.findAll(tag2find)
-        if not elements:
-            print(f"No {tag2find} elements found.", file=sys.stderr)
-        with ThreadPoolExecutor(max_workers=4) as executor:
+        """Find and save the components from the soup object."""
+        folder = os.path.join(pagefolder, category)
+        if not os.path.exists(folder):
+            os.mkdir(folder)
+        with ThreadPoolExecutor(max_workers=10) as executor:
             futures = []
-            for res in tqdm(elements, desc=f"Downloading {tag2find}"):
-                if not res.has_attr(inner):
-                    continue
-                filename = re.sub(r'\W+', '.', os.path.basename(res[inner]))
-                if tag2find == 'link' and (not any(ext in filename for ext in self.linkType)):
-                    filename += '.html'
-                fileurl = urljoin(url, res.get(inner))
-                filepath = os.path.join(pagefolder, filename)
-                res[inner] = os.path.join(os.path.basename(pagefolder), filename)
-                if not os.path.isfile(filepath):
-                    print(f"Downloading {fileurl} to {filepath}")  # Debug statement
-                    futures.append(executor.submit(self._download_file, fileurl, filepath))
-            for future in futures:
-                if future.result():
-                    self.summary[category] += 1
-        print(f"Completed downloading {tag2find} elements. Total: {self.summary[category]}")  # Debug statement
+            for tag in self.soup.find_all(tag2find):
+                try:
+                    turl = tag.get(inner)
+                    if turl is None:
+                        continue
+                    turl = turl.split('?')[0]
+                    filename = os.path.basename(turl).strip().replace(" ", "_")
+                    if len(filename) > 25:
+                        filename = filename[-25:]
+                    savepath = os.path.join(folder, filename)
+                    if not turl.startswith("http"):
+                        turl = urljoin(url, turl)
+                    futures.append(executor.submit(self._download_file, turl, savepath, category))
+                except Exception as e:
+                    print(f"> _soupfindnSave(): Inner exception: {str(e)}")
+            for future in tqdm(futures, desc=f"Downloading {category}"):
+                try:
+                    future.result()
+                except Exception as e:
+                    print(f"> _soupfindnSave(): Future exception: {str(e)}")
+
+    def _download_file(self, url, savepath, category):
+        """Download a file from a URL to a local path."""
+        try:
+            headers = {"User-Agent": "Mozilla/5.0"}
+            response = self.session.get(url, headers=headers, stream=True, auth=self.auth)
+            response.raise_for_status()
+            if self.file_size_limit and int(response.headers.get('content-length', 0)) > self.file_size_limit:
+                print(f"Skipping {url} due to size limit.")
+                return
+            with open(savepath, 'wb') as file:
+                for chunk in response.iter_content(1024):
+                    file.write(chunk)
+            self.summary[category] += 1
+        except Exception as e:
+            print(f"> _download_file(): Download error for {url}: {str(e)}")

From cb6ac1b2d13fb42039a81378cc04734ca0a1efaf Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:46:01 +0530
Subject: [PATCH 52/66] Update auth.py

---
 auth.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/auth.py b/auth.py
index a4ea522..7f55383 100644
--- a/auth.py
+++ b/auth.py
@@ -1,5 +1,3 @@
-# auth.py
-
 import os
 import json
 
@@ -17,7 +15,7 @@ def save_auth_data(data):
 
 def add_credentials(user_id, username, password):
     data = load_auth_data()
-    data[user_id] = {'username': username, 'password': password}
+    data[str(user_id)] = {'username': username, 'password': password}
     save_auth_data(data)
 
 def get_credentials(user_id):
@@ -29,3 +27,5 @@ def remove_credentials(user_id):
     if str(user_id) in data:
         del data[str(user_id)]
         save_auth_data(data)
+        return True
+    return False

From 61c55d7cebe960e13e7adfcd6d19daa1bf25be67 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:51:35 +0530
Subject: [PATCH 53/66] Update main.py

---
 main.py | 41 +++++++++--------------------------------
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/main.py b/main.py
index 4d04155..8670917 100644
--- a/main.py
+++ b/main.py
@@ -88,14 +88,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
-                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
-                InlineKeyboardButton("Images", callback_data=f"i|{url}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url[:50]}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url[:50]}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url[:50]}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
-                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
-                InlineKeyboardButton("JS", callback_data=f"j|{url}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url[:50]}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url[:50]}"),
+                InlineKeyboardButton("JS", callback_data=f"j|{url[:50]}")
             ]
         ]
     )
@@ -122,23 +122,15 @@ async def callback_query_handler(bot, update: CallbackQuery):
     if not res:
         return await update.message.reply('Something went wrong!')
 
+    zip_filename = f"{name}.zip"
     shutil.make_archive(name, 'zip', base_dir=dir)
-    await update.message.reply_document(name+'.zip', caption=summary)
+    await update.message.reply_document(zip_filename, caption=summary)
 
     shutil.rmtree(dir)
-    os.remove(name+'.zip')
+    os.remove(zip_filename)
 
     print("Download completed successfully!")  # Debug statement
 
-def parse_components(text):
-    components = text.split()
-    imgFlg = 'img' in components
-    linkFlg = 'css' in components
-    scriptFlg = 'script' in components
-    videoFlg = 'video' in components
-    xmlFlg = 'xml' in components
-    return imgFlg, linkFlg, scriptFlg, videoFlg, xmlFlg
-
 def is_valid_url(url):
     try:
         response = requests.head(url, timeout=5)
@@ -146,19 +138,4 @@ def is_valid_url(url):
     except requests.RequestException:
         return False
 
-async def send_progress(msg, chat_id, initial_text):
-    try:
-        for i in range(10):
-            await asyncio.sleep(1)
-            try:
-                await Bot.edit_message_text(chat_id=chat_id, message_id=msg.id, text=f"{initial_text}\nProgress: {i*10}%")
-            except Exception as e:
-                if "MESSAGE_ID_INVALID" in str(e):
-                    print(f"Message ID invalid: {e}", file=sys.stderr)
-                    break
-                print(f"Error updating progress: {e}", file=sys.stderr)
-                continue
-    except Exception as e:
-        print(f"Error in send_progress loop: {e}", file=sys.stderr)
-
 Bot.run()

From 5dac34cdbf7ed4e7cfaaa1aa27dbb24abab27ae4 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 11:52:13 +0530
Subject: [PATCH 54/66] Update web_dl.py


From 84fd81f4c87ebcbb13de156147d30131ef3d2f74 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 12:06:31 +0530
Subject: [PATCH 55/66] Update web_dl.py

---
 web_dl.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index b770a8b..54caf38 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,14 +1,12 @@
 import os
 import re
-import sys
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
 
-class urlDownloader(object):
-    """Download the webpage components based on the input URL."""
+class urlDownloader:
     def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
@@ -31,7 +29,6 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xml
         }
 
     def savePage(self, url, pagefolder='page'):
-        """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url, auth=self.auth)
             response.raise_for_status()
@@ -58,7 +55,6 @@ def savePage(self, url, pagefolder='page'):
             return False, str(e)
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
-        """Find and save the components from the soup object."""
         folder = os.path.join(pagefolder, category)
         if not os.path.exists(folder):
             os.mkdir(folder)
@@ -86,7 +82,6 @@ def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category=
                     print(f"> _soupfindnSave(): Future exception: {str(e)}")
 
     def _download_file(self, url, savepath, category):
-        """Download a file from a URL to a local path."""
         try:
             headers = {"User-Agent": "Mozilla/5.0"}
             response = self.session.get(url, headers=headers, stream=True, auth=self.auth)

From c8e4b7f784d65452744eeab88a62b697b0af77b6 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 12:07:07 +0530
Subject: [PATCH 56/66] Update main.py

---
 main.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/main.py b/main.py
index 8670917..3264f37 100644
--- a/main.py
+++ b/main.py
@@ -1,5 +1,4 @@
 import os
-import sys
 import shutil
 import requests
 from pyrogram import Client, filters
@@ -39,7 +38,7 @@
     ]]
 )
 
-@Bot.on_message(filters.command(["start"]))
+@Bot.on_message(filters.command("start"))
 async def start(bot, update):
     text = START_TXT.format(update.from_user.mention)
     reply_markup = START_BTN
@@ -49,7 +48,7 @@ async def start(bot, update):
         reply_markup=reply_markup
     )
 
-@Bot.on_message(filters.command(["auth"]))
+@Bot.on_message(filters.command("auth"))
 async def auth(bot, update):
     if len(update.command) != 2 or ':' not in update.command[1]:
         return await update.reply_text("Please send your username and password in the format 'username:password'")
@@ -58,7 +57,7 @@ async def auth(bot, update):
     add_credentials(update.from_user.id, username, password)
     await update.reply_text("Credentials saved successfully.")
 
-@Bot.on_message(filters.command(["remove_auth"]))
+@Bot.on_message(filters.command("remove_auth"))
 async def remove_auth(bot, update):
     success = remove_credentials(update.from_user.id)
     if success:
@@ -66,7 +65,7 @@ async def remove_auth(bot, update):
     else:
         await update.reply_text("No credentials found to remove.")
 
-@Bot.on_message(filters.command(["view_auth"]))
+@Bot.on_message(filters.command("view_auth"))
 async def view_auth(bot, update):
     creds = get_credentials(update.from_user.id)
     if creds:
@@ -74,7 +73,7 @@ async def view_auth(bot, update):
     else:
         await update.reply_text("No credentials found.")
 
-@Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth|/remove_auth|/view_auth'))
+@Bot.on_message(filters.private & filters.text & ~filters.command(["start", "auth", "remove_auth", "view_auth"]))
 async def webdl(_, m):
     url = m.text.strip()
 
@@ -88,14 +87,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"h|{url[:50]}"),
-                InlineKeyboardButton("CSS", callback_data=f"c|{url[:50]}"),
-                InlineKeyboardButton("Images", callback_data=f"i|{url[:50]}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"x|{url[:50]}"),
-                InlineKeyboardButton("Video", callback_data=f"v|{url[:50]}"),
-                InlineKeyboardButton("JS", callback_data=f"j|{url[:50]}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
+                InlineKeyboardButton("JS", callback_data=f"j|{url}")
             ]
         ]
     )

From 59df0d52c8e358fd931d10b2d19d5b76f9b464d0 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 15:03:03 +0530
Subject: [PATCH 57/66] Update main.py

---
 main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 3264f37..39ea77d 100644
--- a/main.py
+++ b/main.py
@@ -110,13 +110,14 @@ async def callback_query_handler(bot, update: CallbackQuery):
     scriptFlg = component == 'j'
     videoFlg = component == 'v'
     xmlFlg = component == 'x'
+    htmlFlg = component == 'h'
 
     name = dir = str(update.message.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
     auth = get_credentials(update.from_user.id)
-    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, file_size_limit=10*1024*1024, auth=auth)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, htmlFlg=htmlFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
         return await update.message.reply('Something went wrong!')

From faf4e344914191f4ca6ade06c23c5d3eab92ca78 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 15:04:07 +0530
Subject: [PATCH 58/66] Update web_dl.py

---
 web_dl.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 54caf38..c96b92d 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,5 +1,4 @@
 import os
-import re
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
@@ -7,13 +6,14 @@
 from concurrent.futures import ThreadPoolExecutor
 
 class urlDownloader:
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, htmlFlg=False, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
         self.videoFlg = videoFlg
         self.xmlFlg = xmlFlg
+        self.htmlFlg = htmlFlg
         self.file_size_limit = file_size_limit
         self.max_retries = max_retries
         self.auth = auth
@@ -25,7 +25,8 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xml
             'links': 0,
             'scripts': 0,
             'videos': 0,
-            'xmls': 0
+            'xmls': 0,
+            'htmls': 0
         }
 
     def savePage(self, url, pagefolder='page'):
@@ -45,10 +46,11 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='video', inner='src', category='videos')
             if self.xmlFlg:
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
-            with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
-                file.write(self.soup.prettify('utf-8'))
-            summary = (f"Downloaded: {self.summary['images']} images, {self.summary['links']} links, "
-                       f"{self.summary['scripts']} scripts, {self.summary['videos']} videos, {self.summary['xmls']} xmls.")
+            if self.htmlFlg:
+                with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
+                    file.write(self.soup.prettify('utf-8'))
+                self.summary['htmls'] += 1
+            summary = self.generate_summary()
             return True, summary
         except Exception as e:
             print(f"> savePage(): Create page error: {str(e)}")
@@ -95,3 +97,7 @@ def _download_file(self, url, savepath, category):
             self.summary[category] += 1
         except Exception as e:
             print(f"> _download_file(): Download error for {url}: {str(e)}")
+
+    def generate_summary(self):
+        components = [f"{count} {name}" for name, count in self.summary.items() if count > 0]
+        return f"Downloaded: {', '.join(components)}."

From c1b72e63d36cf8e6cbcee6d204af95c3c040e5d4 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 15:11:52 +0530
Subject: [PATCH 59/66] Update main.py

---
 main.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index 39ea77d..8d20463 100644
--- a/main.py
+++ b/main.py
@@ -27,8 +27,8 @@
 Send any URL, optionally with the components you want to download. For example:
 'https://www.google.com img,css,script'
 
-Use /auth username:password to add your authentication credentials.
-Use /remove_auth to remove your authentication credentials.
+Use /auth website_url username:password to add your authentication credentials.
+Use /remove_auth website_url to remove your authentication credentials.
 Use /view_auth to view your stored authentication credentials.
 """
 
@@ -50,16 +50,21 @@ async def start(bot, update):
 
 @Bot.on_message(filters.command("auth"))
 async def auth(bot, update):
-    if len(update.command) != 2 or ':' not in update.command[1]:
-        return await update.reply_text("Please send your username and password in the format 'username:password'")
+    if len(update.command) != 3 or ':' not in update.command[2]:
+        return await update.reply_text("Please send your website URL and credentials in the format 'website_url username:password'")
     
-    username, password = update.command[1].split(":", 1)
-    add_credentials(update.from_user.id, username, password)
+    website, credentials = update.command[1], update.command[2]
+    username, password = credentials.split(":", 1)
+    add_credentials(update.from_user.id, website, username, password)
     await update.reply_text("Credentials saved successfully.")
 
 @Bot.on_message(filters.command("remove_auth"))
 async def remove_auth(bot, update):
-    success = remove_credentials(update.from_user.id)
+    if len(update.command) != 2:
+        return await update.reply_text("Please send the website URL in the format 'website_url'")
+    
+    website = update.command[1]
+    success = remove_credentials(update.from_user.id, website)
     if success:
         await update.reply_text("Credentials removed successfully.")
     else:
@@ -69,7 +74,8 @@ async def remove_auth(bot, update):
 async def view_auth(bot, update):
     creds = get_credentials(update.from_user.id)
     if creds:
-        await update.reply_text(f"Your credentials:\nUsername: {creds['username']}\nPassword: {creds['password']}")
+        cred_list = [f"Website: {website}\nUsername: {details['username']}\nPassword: {details['password']}" for website, details in creds.items()]
+        await update.reply_text("\n\n".join(cred_list))
     else:
         await update.reply_text("No credentials found.")
 

From 07f11010e0bae5ef23f0387948f983e633063779 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 15:13:16 +0530
Subject: [PATCH 60/66] Update auth.py

---
 auth.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/auth.py b/auth.py
index 7f55383..4e9ded9 100644
--- a/auth.py
+++ b/auth.py
@@ -13,19 +13,21 @@ def save_auth_data(data):
     with open(AUTH_FILE, 'w') as file:
         json.dump(data, file, indent=4)
 
-def add_credentials(user_id, username, password):
+def add_credentials(user_id, website, username, password):
     data = load_auth_data()
-    data[str(user_id)] = {'username': username, 'password': password}
+    if str(user_id) not in data:
+        data[str(user_id)] = {}
+    data[str(user_id)][website] = {'username': username, 'password': password}
     save_auth_data(data)
 
 def get_credentials(user_id):
     data = load_auth_data()
-    return data.get(str(user_id))
+    return data.get(str(user_id), {})
 
-def remove_credentials(user_id):
+def remove_credentials(user_id, website):
     data = load_auth_data()
-    if str(user_id) in data:
-        del data[str(user_id)]
+    if str(user_id) in data and website in data[str(user_id)]:
+        del data[str(user_id)][website]
         save_auth_data(data)
         return True
     return False

From a27a4017bc08fc0f6b589f3fa0309214a982a90d Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 15:14:11 +0530
Subject: [PATCH 61/66] Update web_dl.py


From bd11e2843b2388dcce9a42469e6083a930d2f1f6 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 16:18:00 +0530
Subject: [PATCH 62/66] Update main.py

---
 main.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/main.py b/main.py
index 8d20463..aa63dc1 100644
--- a/main.py
+++ b/main.py
@@ -139,9 +139,20 @@ async def callback_query_handler(bot, update: CallbackQuery):
 
 def is_valid_url(url):
     try:
-        response = requests.head(url, timeout=5)
-        return response.status_code == 200
-    except requests.RequestException:
-        return False
-
+        headers = {"User-Agent": "Mozilla/5.0"}
+        response = requests.head(url, headers=headers, timeout=10, allow_redirects=True)
+        if response.status_code == 200:
+            return True
+        print(f"HEAD request failed with status code: {response.status_code}")
+        print(f"Response headers: {response.headers}")
+        # Fallback to GET request if HEAD fails
+        response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
+        if response.status_code == 200:
+            return True
+        print(f"GET request failed with status code: {response.status_code}")
+        print(f"Response headers: {response.headers}")
+    except requests.RequestException as e:
+        print(f"Request exception: {e}")
+    return False
+ 
 Bot.run()

From 41dff1a89880dcdddfc25c28ea11c75018fc7b59 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 16:23:36 +0530
Subject: [PATCH 63/66] Update main.py

---
 main.py | 67 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/main.py b/main.py
index aa63dc1..8075859 100644
--- a/main.py
+++ b/main.py
@@ -5,7 +5,6 @@
 from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery
 from web_dl import urlDownloader
 from auth import add_credentials, get_credentials, remove_credentials
-import asyncio
 
 # Bot configuration using environment variables
 BOT_TOKEN = os.environ.get("BOT_TOKEN")
@@ -27,8 +26,8 @@
 Send any URL, optionally with the components you want to download. For example:
 'https://www.google.com img,css,script'
 
-Use /auth website_url username:password to add your authentication credentials.
-Use /remove_auth website_url to remove your authentication credentials.
+Use /auth username:password to add your authentication credentials.
+Use /remove_auth to remove your authentication credentials.
 Use /view_auth to view your stored authentication credentials.
 """
 
@@ -38,7 +37,7 @@
     ]]
 )
 
-@Bot.on_message(filters.command("start"))
+@Bot.on_message(filters.command(["start"]))
 async def start(bot, update):
     text = START_TXT.format(update.from_user.mention)
     reply_markup = START_BTN
@@ -48,38 +47,32 @@ async def start(bot, update):
         reply_markup=reply_markup
     )
 
-@Bot.on_message(filters.command("auth"))
+@Bot.on_message(filters.command(["auth"]))
 async def auth(bot, update):
-    if len(update.command) != 3 or ':' not in update.command[2]:
-        return await update.reply_text("Please send your website URL and credentials in the format 'website_url username:password'")
+    if len(update.command) != 2 or ':' not in update.command[1]:
+        return await update.reply_text("Please send your username and password in the format 'username:password'")
     
-    website, credentials = update.command[1], update.command[2]
-    username, password = credentials.split(":", 1)
-    add_credentials(update.from_user.id, website, username, password)
+    username, password = update.command[1].split(":", 1)
+    add_credentials(update.from_user.id, username, password)
     await update.reply_text("Credentials saved successfully.")
 
-@Bot.on_message(filters.command("remove_auth"))
+@Bot.on_message(filters.command(["remove_auth"]))
 async def remove_auth(bot, update):
-    if len(update.command) != 2:
-        return await update.reply_text("Please send the website URL in the format 'website_url'")
-    
-    website = update.command[1]
-    success = remove_credentials(update.from_user.id, website)
+    success = remove_credentials(update.from_user.id)
     if success:
         await update.reply_text("Credentials removed successfully.")
     else:
         await update.reply_text("No credentials found to remove.")
 
-@Bot.on_message(filters.command("view_auth"))
+@Bot.on_message(filters.command(["view_auth"]))
 async def view_auth(bot, update):
     creds = get_credentials(update.from_user.id)
     if creds:
-        cred_list = [f"Website: {website}\nUsername: {details['username']}\nPassword: {details['password']}" for website, details in creds.items()]
-        await update.reply_text("\n\n".join(cred_list))
+        await update.reply_text(f"Your credentials:\nUsername: {creds['username']}\nPassword: {creds['password']}")
     else:
         await update.reply_text("No credentials found.")
 
-@Bot.on_message(filters.private & filters.text & ~filters.command(["start", "auth", "remove_auth", "view_auth"]))
+@Bot.on_message(filters.private & filters.text & ~filters.regex('/start|/auth|/remove_auth|/view_auth'))
 async def webdl(_, m):
     url = m.text.strip()
 
@@ -93,14 +86,14 @@ async def webdl(_, m):
     keyboard = InlineKeyboardMarkup(
         [
             [
-                InlineKeyboardButton("HTML", callback_data=f"h|{url}"),
-                InlineKeyboardButton("CSS", callback_data=f"c|{url}"),
-                InlineKeyboardButton("Images", callback_data=f"i|{url}")
+                InlineKeyboardButton("HTML", callback_data=f"h|{url[:50]}"),
+                InlineKeyboardButton("CSS", callback_data=f"c|{url[:50]}"),
+                InlineKeyboardButton("Images", callback_data=f"i|{url[:50]}")
             ],
             [
-                InlineKeyboardButton("XML", callback_data=f"x|{url}"),
-                InlineKeyboardButton("Video", callback_data=f"v|{url}"),
-                InlineKeyboardButton("JS", callback_data=f"j|{url}")
+                InlineKeyboardButton("XML", callback_data=f"x|{url[:50]}"),
+                InlineKeyboardButton("Video", callback_data=f"v|{url[:50]}"),
+                InlineKeyboardButton("JS", callback_data=f"j|{url[:50]}")
             ]
         ]
     )
@@ -116,24 +109,34 @@ async def callback_query_handler(bot, update: CallbackQuery):
     scriptFlg = component == 'j'
     videoFlg = component == 'v'
     xmlFlg = component == 'x'
-    htmlFlg = component == 'h'
 
     name = dir = str(update.message.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
     auth = get_credentials(update.from_user.id)
-    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, htmlFlg=htmlFlg, file_size_limit=10*1024*1024, auth=auth)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
         return await update.message.reply('Something went wrong!')
 
     zip_filename = f"{name}.zip"
     shutil.make_archive(name, 'zip', base_dir=dir)
-    await update.message.reply_document(zip_filename, caption=summary)
 
-    shutil.rmtree(dir)
-    os.remove(zip_filename)
+    try:
+        await update.message.reply_document(zip_filename, caption=summary)
+    except Exception as e:
+        print(f"Failed to send document: {e}")
+
+    try:
+        shutil.rmtree(dir)
+    except Exception as e:
+        print(f"Failed to remove directory {dir}: {e}")
+
+    try:
+        os.remove(zip_filename)
+    except Exception as e:
+        print(f"Failed to remove zip file {zip_filename}: {e}")
 
     print("Download completed successfully!")  # Debug statement
 
@@ -154,5 +157,5 @@ def is_valid_url(url):
     except requests.RequestException as e:
         print(f"Request exception: {e}")
     return False
- 
+
 Bot.run()

From 7fb50aa96510a822fe7cc8fd970a321a9606be53 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 16:24:06 +0530
Subject: [PATCH 64/66] Update web_dl.py

---
 web_dl.py | 87 +++++++++++++++++++++++++------------------------------
 1 file changed, 39 insertions(+), 48 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index c96b92d..31a2bb4 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,19 +1,20 @@
 import os
+import re
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor
 
-class urlDownloader:
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, htmlFlg=False, file_size_limit=None, max_retries=3, auth=None):
+class urlDownloader(object):
+    """Download the webpage components based on the input URL."""
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
         self.videoFlg = videoFlg
         self.xmlFlg = xmlFlg
-        self.htmlFlg = htmlFlg
         self.file_size_limit = file_size_limit
         self.max_retries = max_retries
         self.auth = auth
@@ -25,11 +26,11 @@ def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xml
             'links': 0,
             'scripts': 0,
             'videos': 0,
-            'xmls': 0,
-            'htmls': 0
+            'xmls': 0
         }
 
     def savePage(self, url, pagefolder='page'):
+        """Save the web page components based on the input URL and dir name."""
         try:
             response = self.session.get(url, auth=self.auth)
             response.raise_for_status()
@@ -46,58 +47,48 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='video', inner='src', category='videos')
             if self.xmlFlg:
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
-            if self.htmlFlg:
-                with open(os.path.join(pagefolder, 'page.html'), 'wb') as file:
-                    file.write(self.soup.prettify('utf-8'))
-                self.summary['htmls'] += 1
-            summary = self.generate_summary()
-            return True, summary
+            with open(os.path.join(pagefolder, 'index.html'), 'w', encoding='utf-8') as f:
+                f.write(self.soup.prettify())
+            summary_text = "\n".join([f"{k}: {v}" for k, v in self.summary.items()])
+            return True, summary_text
         except Exception as e:
-            print(f"> savePage(): Create page error: {str(e)}")
-            return False, str(e)
+            print(f"Error saving page: {e}")
+            return False, ""
 
     def _soupfindnSave(self, url, pagefolder, tag2find='img', inner='src', category='images'):
+        """Find and save specific elements in the soup."""
+        tags = self.soup.find_all(tag2find)
+        print(f"Found {len(tags)} {category} tags")  # Debug statement
+        urls = [tag.get(inner) for tag in tags]
+        urls = [urljoin(url, u) for u in urls]
+        urls = list(set(urls))
+        self.summary[category] += len(urls)
         folder = os.path.join(pagefolder, category)
         if not os.path.exists(folder):
             os.mkdir(folder)
         with ThreadPoolExecutor(max_workers=10) as executor:
-            futures = []
-            for tag in self.soup.find_all(tag2find):
-                try:
-                    turl = tag.get(inner)
-                    if turl is None:
-                        continue
-                    turl = turl.split('?')[0]
-                    filename = os.path.basename(turl).strip().replace(" ", "_")
-                    if len(filename) > 25:
-                        filename = filename[-25:]
-                    savepath = os.path.join(folder, filename)
-                    if not turl.startswith("http"):
-                        turl = urljoin(url, turl)
-                    futures.append(executor.submit(self._download_file, turl, savepath, category))
-                except Exception as e:
-                    print(f"> _soupfindnSave(): Inner exception: {str(e)}")
-            for future in tqdm(futures, desc=f"Downloading {category}"):
-                try:
-                    future.result()
-                except Exception as e:
-                    print(f"> _soupfindnSave(): Future exception: {str(e)}")
+            for u in urls:
+                executor.submit(self._savefile, folder, u)
 
-    def _download_file(self, url, savepath, category):
+    def _savefile(self, folder, fileurl):
+        """Save the file content from the URL to the given folder."""
+        if not fileurl:
+            return
+        name = re.sub(r'\W+', '', os.path.basename(fileurl))
+        filename = os.path.join(folder, name)
+        print(f"Downloading {fileurl} to {filename}")  # Debug statement
         try:
-            headers = {"User-Agent": "Mozilla/5.0"}
-            response = self.session.get(url, headers=headers, stream=True, auth=self.auth)
+            response = self.session.get(fileurl, stream=True, auth=self.auth)
             response.raise_for_status()
-            if self.file_size_limit and int(response.headers.get('content-length', 0)) > self.file_size_limit:
-                print(f"Skipping {url} due to size limit.")
+            content_length = response.headers.get('Content-Length')
+            if content_length and self.file_size_limit and int(content_length) > self.file_size_limit:
+                print(f"Skipping {fileurl}, file size {content_length} exceeds limit {self.file_size_limit}")
                 return
-            with open(savepath, 'wb') as file:
-                for chunk in response.iter_content(1024):
-                    file.write(chunk)
-            self.summary[category] += 1
+            with open(filename, 'wb') as f:
+                for chunk in tqdm(response.iter_content(chunk_size=1024)):
+                    if chunk:
+                        f.write(chunk)
         except Exception as e:
-            print(f"> _download_file(): Download error for {url}: {str(e)}")
-
-    def generate_summary(self):
-        components = [f"{count} {name}" for name, count in self.summary.items() if count > 0]
-        return f"Downloaded: {', '.join(components)}."
+            print(f"Error downloading {fileurl}: {e}")
+            if os.path.exists(filename):
+                os.remove(filename)

From f3d4e0552b96e087bc0ddb1d99d43575c5bd1379 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 16:31:01 +0530
Subject: [PATCH 65/66] Update main.py

---
 main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 8075859..ed520d5 100644
--- a/main.py
+++ b/main.py
@@ -109,13 +109,14 @@ async def callback_query_handler(bot, update: CallbackQuery):
     scriptFlg = component == 'j'
     videoFlg = component == 'v'
     xmlFlg = component == 'x'
+    htmlFlg = component == 'h'  # Adding HTML flag here
 
     name = dir = str(update.message.chat.id)
     if not os.path.isdir(dir):
         os.makedirs(dir)
 
     auth = get_credentials(update.from_user.id)
-    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, file_size_limit=10*1024*1024, auth=auth)
+    obj = urlDownloader(imgFlg=imgFlg, linkFlg=linkFlg, scriptFlg=scriptFlg, videoFlg=videoFlg, xmlFlg=xmlFlg, htmlFlg=htmlFlg, file_size_limit=10*1024*1024, auth=auth)
     res, summary = obj.savePage(url, dir)
     if not res:
         return await update.message.reply('Something went wrong!')

From 208bd25f918280fb64c68d68857dc5273d05f154 Mon Sep 17 00:00:00 2001
From: Md Matin Ashraf <91468485+Ashrafmdmatin41@users.noreply.github.com>
Date: Sat, 25 May 2024 16:31:37 +0530
Subject: [PATCH 66/66] Update web_dl.py

---
 web_dl.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/web_dl.py b/web_dl.py
index 31a2bb4..81445d8 100644
--- a/web_dl.py
+++ b/web_dl.py
@@ -1,5 +1,6 @@
 import os
 import re
+import sys
 import requests
 from urllib.parse import urljoin
 from bs4 import BeautifulSoup
@@ -8,26 +9,28 @@
 
 class urlDownloader(object):
     """Download the webpage components based on the input URL."""
-    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
+    def __init__(self, imgFlg=True, linkFlg=True, scriptFlg=True, videoFlg=True, xmlFlg=True, htmlFlg=True, file_size_limit=None, max_retries=3, auth=None):
         self.soup = None
         self.imgFlg = imgFlg
         self.linkFlg = linkFlg
         self.scriptFlg = scriptFlg
         self.videoFlg = videoFlg
         self.xmlFlg = xmlFlg
+        self.htmlFlg = htmlFlg
         self.file_size_limit = file_size_limit
         self.max_retries = max_retries
         self.auth = auth
         self.linkType = ('css', 'png', 'ico', 'jpg', 'jpeg', 'mov', 'ogg', 'gif', 'xml', 'js')
         self.videoType = ('mp4', 'webm', 'ogg')
-        self.session = requests.Session()
         self.summary = {
             'images': 0,
             'links': 0,
             'scripts': 0,
             'videos': 0,
-            'xmls': 0
+            'xmls': 0,
+            'htmls': 0
         }
+        self.session = requests.Session()
 
     def savePage(self, url, pagefolder='page'):
         """Save the web page components based on the input URL and dir name."""
@@ -47,6 +50,8 @@ def savePage(self, url, pagefolder='page'):
                 self._soupfindnSave(url, pagefolder, tag2find='video', inner='src', category='videos')
             if self.xmlFlg:
                 self._soupfindnSave(url, pagefolder, tag2find='xml', inner='src', category='xmls')
+            if self.htmlFlg:
+                self._soupfindnSave(url, pagefolder, tag2find='html', inner='src', category='htmls')
             with open(os.path.join(pagefolder, 'index.html'), 'w', encoding='utf-8') as f:
                 f.write(self.soup.prettify())
             summary_text = "\n".join([f"{k}: {v}" for k, v in self.summary.items()])