Loading ballontranslator/dl/pagesources/__init__.py +17 −11 Original line number Diff line number Diff line import shutil import requests import undetected_chromedriver as uc from selenium.webdriver.chrome.options import Options Loading Loading @@ -69,10 +70,8 @@ class SourceBase: def FetchImageUrls(self, force_redownload: bool = False): if self.url: LOGGER.info('Scraping website for images') # set download path if not self.title: # filter url for illegal characters if not self.title: _url = self.url.translate({ord(c): None for c in '\./:*?"<>|'}) self.path = rf'{SOURCE_DOWNLOAD_PATH}\{_url}' Loading @@ -81,7 +80,6 @@ class SourceBase: path_to_page_num = rf'{self.path}\pages.txt' # check if the files are already downloaded are_downloaded = False if not os.path.exists(self.path): os.makedirs(self.path) Loading @@ -90,17 +88,16 @@ class SourceBase: if are_downloaded is False: # initialize webdriver options = Options() # options.add_argument("--headless") driver = uc.Chrome(options=options) # load page and wait for cloudflare to pass # wait for cloudflare to pass driver.get(self.url) time.sleep(10) soup = BeautifulSoup(driver.page_source, 'html.parser') driver.close() # find all images and filter them _elements = soup.find_all('img') urls = [img['src'] for img in _elements] images = [k for k in urls if 'https' in k] Loading @@ -124,19 +121,28 @@ class SourceBase: if not self.image_urls: raise ImagesNotFoundInRequest(self.image_urls) # download images self.WebsiteExceptions() LOGGER.info(self.image_urls) self.DownloadImages() def WebsiteExceptions(self): urls = self.image_urls if any('nhentai' in k for k in urls): for i, s in enumerate(urls): urls[i] = s.replace('https://t', 'https://i').replace('t.jpg', '.jpg') self.image_urls = urls def DownloadImages(self): n = 1 LOGGER.info('Downloading images') for i in self.image_urls: img_data = requests.get(i).content img_data = requests.get(i, stream=True) with open(rf'{self.path}\{n:03}.jpg', 'wb') as image: image.write(img_data) shutil.copyfileobj(img_data.raw, image) n += 1 # Avoid IP ban time.sleep(1) Loading ballontranslator/ui/dl_manager.py +5 −18 Original line number Diff line number Diff line Loading @@ -21,8 +21,8 @@ from .stylewidgets import ImgtransProgressMessageBox from .configpanel import ConfigPanel from .misc import DLModuleConfig, ProgramConfig from .imgtrans_proj import ProjImgTrans from dl.textdetector import TextBlock from dl.pagesources import SourceBase class ModuleThread(QThread): Loading Loading @@ -571,23 +571,10 @@ class DLManager(QObject): if self.translate_thread.isRunning(): self.translate_thread.terminate() def source(self): title = self.config.src_title_flag url = self.config.src_link_flag force_redownload = self.config.src_force_download_flag LOGGER.info(f'Force download set to {force_redownload}') LOGGER.info(f'Url set to {url}') LOGGER.info(f'Project title set to {title}') manga = SourceBase() manga.run(url=url, force_redownload=force_redownload, title=title) proj_path = manga.ReturnFullPathToProject() LOGGER.info(proj_path) if proj_path: self.imgtrans_proj.load(proj_path) def runImgtransPipeline(self): self.source() def runImgtransPipeline(self, menu): from .sourcemanager import SourceManager source = SourceManager(self.config, self.imgtrans_proj, menu) source.download_source() if self.imgtrans_proj.is_empty: LOGGER.info('proj file is empty, nothing to do') self.progress_msgbox.hide() Loading ballontranslator/ui/mainwindow.py +1 −1 Original line number Diff line number Diff line Loading @@ -908,7 +908,7 @@ class MainWindow(FramelessWindow): if self.bottomBar.textblockChecker.isChecked(): self.bottomBar.textblockChecker.click() self.postprocess_mt_toggle = False self.dl_manager.runImgtransPipeline() self.dl_manager.runImgtransPipeline(self) def on_transpanel_changed(self): self.canvas.editor_index = self.rightComicTransStackPanel.currentIndex() Loading ballontranslator/ui/sourcemanager.py 0 → 100644 +28 −0 Original line number Diff line number Diff line from .mainwindow import MainWindow from dl.pagesources import SourceBase from utils.logger import logger as LOGGER from .misc import ProgramConfig from .imgtrans_proj import ProjImgTrans class SourceManager(SourceBase): def __init__(self, config: ProgramConfig, imgtrans_proj: ProjImgTrans, menu: MainWindow, *args, **kwargs): super().__init__(*args, **kwargs) self.config_pnl = config self.imgtrans_proj = imgtrans_proj self.menu = menu def download_source(self): title = self.config_pnl.src_title_flag url = self.config_pnl.src_link_flag force_redownload = self.config_pnl.src_force_download_flag LOGGER.info(f'Force download set to {force_redownload}') LOGGER.info(f'Url set to {url}') LOGGER.info(f'Project title set to {title}') self.run(url=url, force_redownload=force_redownload, title=title) proj_path = self.ReturnFullPathToProject() LOGGER.info(f'Project path set to {proj_path}') if proj_path: self.menu.openDir(proj_path) Loading
ballontranslator/dl/pagesources/__init__.py +17 −11 Original line number Diff line number Diff line import shutil import requests import undetected_chromedriver as uc from selenium.webdriver.chrome.options import Options Loading Loading @@ -69,10 +70,8 @@ class SourceBase: def FetchImageUrls(self, force_redownload: bool = False): if self.url: LOGGER.info('Scraping website for images') # set download path if not self.title: # filter url for illegal characters if not self.title: _url = self.url.translate({ord(c): None for c in '\./:*?"<>|'}) self.path = rf'{SOURCE_DOWNLOAD_PATH}\{_url}' Loading @@ -81,7 +80,6 @@ class SourceBase: path_to_page_num = rf'{self.path}\pages.txt' # check if the files are already downloaded are_downloaded = False if not os.path.exists(self.path): os.makedirs(self.path) Loading @@ -90,17 +88,16 @@ class SourceBase: if are_downloaded is False: # initialize webdriver options = Options() # options.add_argument("--headless") driver = uc.Chrome(options=options) # load page and wait for cloudflare to pass # wait for cloudflare to pass driver.get(self.url) time.sleep(10) soup = BeautifulSoup(driver.page_source, 'html.parser') driver.close() # find all images and filter them _elements = soup.find_all('img') urls = [img['src'] for img in _elements] images = [k for k in urls if 'https' in k] Loading @@ -124,19 +121,28 @@ class SourceBase: if not self.image_urls: raise ImagesNotFoundInRequest(self.image_urls) # download images self.WebsiteExceptions() LOGGER.info(self.image_urls) self.DownloadImages() def WebsiteExceptions(self): urls = self.image_urls if any('nhentai' in k for k in urls): for i, s in enumerate(urls): urls[i] = s.replace('https://t', 'https://i').replace('t.jpg', '.jpg') self.image_urls = urls def DownloadImages(self): n = 1 LOGGER.info('Downloading images') for i in self.image_urls: img_data = requests.get(i).content img_data = requests.get(i, stream=True) with open(rf'{self.path}\{n:03}.jpg', 'wb') as image: image.write(img_data) shutil.copyfileobj(img_data.raw, image) n += 1 # Avoid IP ban time.sleep(1) Loading
ballontranslator/ui/dl_manager.py +5 −18 Original line number Diff line number Diff line Loading @@ -21,8 +21,8 @@ from .stylewidgets import ImgtransProgressMessageBox from .configpanel import ConfigPanel from .misc import DLModuleConfig, ProgramConfig from .imgtrans_proj import ProjImgTrans from dl.textdetector import TextBlock from dl.pagesources import SourceBase class ModuleThread(QThread): Loading Loading @@ -571,23 +571,10 @@ class DLManager(QObject): if self.translate_thread.isRunning(): self.translate_thread.terminate() def source(self): title = self.config.src_title_flag url = self.config.src_link_flag force_redownload = self.config.src_force_download_flag LOGGER.info(f'Force download set to {force_redownload}') LOGGER.info(f'Url set to {url}') LOGGER.info(f'Project title set to {title}') manga = SourceBase() manga.run(url=url, force_redownload=force_redownload, title=title) proj_path = manga.ReturnFullPathToProject() LOGGER.info(proj_path) if proj_path: self.imgtrans_proj.load(proj_path) def runImgtransPipeline(self): self.source() def runImgtransPipeline(self, menu): from .sourcemanager import SourceManager source = SourceManager(self.config, self.imgtrans_proj, menu) source.download_source() if self.imgtrans_proj.is_empty: LOGGER.info('proj file is empty, nothing to do') self.progress_msgbox.hide() Loading
ballontranslator/ui/mainwindow.py +1 −1 Original line number Diff line number Diff line Loading @@ -908,7 +908,7 @@ class MainWindow(FramelessWindow): if self.bottomBar.textblockChecker.isChecked(): self.bottomBar.textblockChecker.click() self.postprocess_mt_toggle = False self.dl_manager.runImgtransPipeline() self.dl_manager.runImgtransPipeline(self) def on_transpanel_changed(self): self.canvas.editor_index = self.rightComicTransStackPanel.currentIndex() Loading
ballontranslator/ui/sourcemanager.py 0 → 100644 +28 −0 Original line number Diff line number Diff line from .mainwindow import MainWindow from dl.pagesources import SourceBase from utils.logger import logger as LOGGER from .misc import ProgramConfig from .imgtrans_proj import ProjImgTrans class SourceManager(SourceBase): def __init__(self, config: ProgramConfig, imgtrans_proj: ProjImgTrans, menu: MainWindow, *args, **kwargs): super().__init__(*args, **kwargs) self.config_pnl = config self.imgtrans_proj = imgtrans_proj self.menu = menu def download_source(self): title = self.config_pnl.src_title_flag url = self.config_pnl.src_link_flag force_redownload = self.config_pnl.src_force_download_flag LOGGER.info(f'Force download set to {force_redownload}') LOGGER.info(f'Url set to {url}') LOGGER.info(f'Project title set to {title}') self.run(url=url, force_redownload=force_redownload, title=title) proj_path = self.ReturnFullPathToProject() LOGGER.info(f'Project path set to {proj_path}') if proj_path: self.menu.openDir(proj_path)