Commit ab00088b authored by John's avatar John
Browse files

1.0 (not fully tested)

parent 22db7072
Loading
Loading
Loading
Loading
+17 −11
Original line number Diff line number Diff line
import shutil
import requests
import undetected_chromedriver as uc
from selenium.webdriver.chrome.options import Options
@@ -69,10 +70,8 @@ class SourceBase:
    def FetchImageUrls(self, force_redownload: bool = False):
        if self.url:
            LOGGER.info('Scraping website for images')
            #  set download path
            if not self.title:

                #  filter url for illegal characters
            if not self.title:
                _url = self.url.translate({ord(c): None for c in '\./:*?"<>|'})
                self.path = rf'{SOURCE_DOWNLOAD_PATH}\{_url}'

@@ -81,7 +80,6 @@ class SourceBase:

            path_to_page_num = rf'{self.path}\pages.txt'

            #  check if the files are already downloaded
            are_downloaded = False
            if not os.path.exists(self.path):
                os.makedirs(self.path)
@@ -90,17 +88,16 @@ class SourceBase:

            if are_downloaded is False:

                #  initialize webdriver
                options = Options()
                # options.add_argument("--headless")
                driver = uc.Chrome(options=options)

                #  load page and wait for cloudflare to pass
                #  wait for cloudflare to pass
                driver.get(self.url)
                time.sleep(10)
                soup = BeautifulSoup(driver.page_source, 'html.parser')
                driver.close()

                #  find all images and filter them
                _elements = soup.find_all('img')
                urls = [img['src'] for img in _elements]
                images = [k for k in urls if 'https' in k]
@@ -124,19 +121,28 @@ class SourceBase:
                if not self.image_urls:
                    raise ImagesNotFoundInRequest(self.image_urls)

                #  download images
                self.WebsiteExceptions()
                LOGGER.info(self.image_urls)
                self.DownloadImages()

    def WebsiteExceptions(self):
        urls = self.image_urls
        if any('nhentai' in k for k in urls):

            for i, s in enumerate(urls):
                urls[i] = s.replace('https://t', 'https://i').replace('t.jpg', '.jpg')

            self.image_urls = urls

    def DownloadImages(self):
        n = 1
        LOGGER.info('Downloading images')

        for i in self.image_urls:
            img_data = requests.get(i).content
            img_data = requests.get(i, stream=True)

            with open(rf'{self.path}\{n:03}.jpg', 'wb') as image:
                image.write(img_data)

                shutil.copyfileobj(img_data.raw, image)
            n += 1
            #  Avoid IP ban
            time.sleep(1)
+5 −18
Original line number Diff line number Diff line
@@ -21,8 +21,8 @@ from .stylewidgets import ImgtransProgressMessageBox
from .configpanel import ConfigPanel
from .misc import DLModuleConfig, ProgramConfig
from .imgtrans_proj import ProjImgTrans

from dl.textdetector import TextBlock
from dl.pagesources import SourceBase


class ModuleThread(QThread):
@@ -571,23 +571,10 @@ class DLManager(QObject):
        if self.translate_thread.isRunning():
            self.translate_thread.terminate()

    def source(self):
        title = self.config.src_title_flag
        url = self.config.src_link_flag
        force_redownload = self.config.src_force_download_flag
        LOGGER.info(f'Force download set to {force_redownload}')
        LOGGER.info(f'Url set to {url}')
        LOGGER.info(f'Project title set to {title}')
        manga = SourceBase()
        manga.run(url=url, force_redownload=force_redownload, title=title)
        proj_path = manga.ReturnFullPathToProject()
        LOGGER.info(proj_path)
        if proj_path:
            self.imgtrans_proj.load(proj_path)


    def runImgtransPipeline(self):
        self.source()
    def runImgtransPipeline(self, menu):
        from .sourcemanager import SourceManager
        source = SourceManager(self.config, self.imgtrans_proj, menu)
        source.download_source()
        if self.imgtrans_proj.is_empty:
            LOGGER.info('proj file is empty, nothing to do')
            self.progress_msgbox.hide()
+1 −1
Original line number Diff line number Diff line
@@ -908,7 +908,7 @@ class MainWindow(FramelessWindow):
        if self.bottomBar.textblockChecker.isChecked():
            self.bottomBar.textblockChecker.click()
        self.postprocess_mt_toggle = False
        self.dl_manager.runImgtransPipeline()
        self.dl_manager.runImgtransPipeline(self)

    def on_transpanel_changed(self):
        self.canvas.editor_index = self.rightComicTransStackPanel.currentIndex()
+28 −0
Original line number Diff line number Diff line
from .mainwindow import MainWindow
from dl.pagesources import SourceBase
from utils.logger import logger as LOGGER
from .misc import ProgramConfig
from .imgtrans_proj import ProjImgTrans


class SourceManager(SourceBase):

    def __init__(self, config: ProgramConfig, imgtrans_proj: ProjImgTrans, menu: MainWindow, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.config_pnl = config
        self.imgtrans_proj = imgtrans_proj
        self.menu = menu

    def download_source(self):
        title = self.config_pnl.src_title_flag
        url = self.config_pnl.src_link_flag
        force_redownload = self.config_pnl.src_force_download_flag
        LOGGER.info(f'Force download set to {force_redownload}')
        LOGGER.info(f'Url set to {url}')
        LOGGER.info(f'Project title set to {title}')
        self.run(url=url, force_redownload=force_redownload, title=title)
        proj_path = self.ReturnFullPathToProject()
        LOGGER.info(f'Project path set to {proj_path}')
        if proj_path:
            self.menu.openDir(proj_path)