Commit e77ea891 authored by John's avatar John
Browse files

Implemented source downloads into the gui

parent 956ebff1
Loading
Loading
Loading
Loading
+58 −47
Original line number Diff line number Diff line
import requests
import undetected_chromedriver as uc
from undetected_chromedriver import ChromeOptions
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import re
import time
from constants import SOURCE_DOWNLOAD_PATH
from exceptions import ImagesNotFoundInRequest, NotValidUrl
from .constants import SOURCE_DOWNLOAD_PATH
from .exceptions import ImagesNotFoundInRequest, NotValidUrl
from utils.logger import logger as LOGGER
import os


@@ -34,6 +35,13 @@ class SourceBase:
    def ReturnNumberOfPages(self) -> int:
        return self.last_page_num

    def ReturnFullPathToProject(self) -> str:
        return self.path

    def CheckLink(self):
        if 'https://' not in self.url:
            raise NotValidUrl(self.url)

    def CheckFiles(self, path):

        #  read known page number
@@ -59,7 +67,8 @@ class SourceBase:
            return False

    def FetchImageUrls(self, force_redownload: bool = False):

        if self.url:
            LOGGER.info('Scraping website for images')
            #  set download path
            if not self.title:

@@ -82,8 +91,8 @@ class SourceBase:
            if are_downloaded is False:

                #  initialize webdriver
            options = ChromeOptions()
            options.add_argument('headless')
                options = Options()
                # options.add_argument("--headless")
                driver = uc.Chrome(options=options)

                #  load page and wait for cloudflare to pass
@@ -112,11 +121,15 @@ class SourceBase:
                    except AttributeError:
                        pass

                if not self.image_urls:
                    raise ImagesNotFoundInRequest(self.image_urls)

                #  download images
                self.DownloadImages()

    def DownloadImages(self):
        n = 1
        LOGGER.info('Downloading images')

        for i in self.image_urls:
            img_data = requests.get(i).content
@@ -132,13 +145,11 @@ class SourceBase:

        self.SaveNumberOfPages(rf'{self.path}\pages.txt')

        LOGGER.info('Download complete')

    def run(self, url: str, force_redownload: bool, title: str = ''):
        self.SetUrl(url)
        if title:
            self.SetTitle(title)
        self.FetchImageUrls(force_redownload)

if __name__ == '__main__':
    Source = SourceBase()
    Source.run(url='https://nhentai.net/g/444882/', force_redownload=False, title='Pog')
+20 −4
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ class ConfigPanel(Widget):
        label_inpaint = self.tr('Inpaint')
        label_translator = self.tr('Translator')
        label_startup = self.tr('Startup')
        label_sources = self.tr('Sources')
        label_lettering = self.tr('Lettering')
        label_saladict = self.tr("SalaDict")
    
@@ -307,6 +308,7 @@ class ConfigPanel(Widget):
        ])
        generalTableItem.appendRows([
            TableItem(label_startup, CONFIG_FONTSIZE_TABLE),
            TableItem(label_sources, CONFIG_FONTSIZE_TABLE),
            TableItem(label_lettering, CONFIG_FONTSIZE_TABLE),
            TableItem(label_saladict, CONFIG_FONTSIZE_TABLE)
        ])
@@ -331,6 +333,14 @@ class ConfigPanel(Widget):
        self.open_on_startup_checker = generalConfigPanel.addCheckBox(self.tr('Reopen last project on startup'))
        self.open_on_startup_checker.stateChanged.connect(self.on_open_onstartup_changed)

        generalConfigPanel.addTextLabel(label_sources)
        self.src_link_textbox = generalConfigPanel.addLineEdit('Source url')
        self.src_link_textbox.textChanged.connect(self.on_source_link_changed)
        self.src_title_textbox = generalConfigPanel.addLineEdit('Title of your project')
        self.src_title_textbox.textChanged.connect(self.on_source_title_changed)
        self.src_force_download_checker = generalConfigPanel.addCheckBox(self.tr('Force download/redownload'))
        self.src_force_download_checker.stateChanged.connect(self.on_source_force_download_changed)

        generalConfigPanel.addTextLabel(label_lettering)
        dec_program_str = self.tr('decide by program')
        use_global_str = self.tr('use global setting')
@@ -441,11 +451,14 @@ class ConfigPanel(Widget):
    # def on_source_flag_changed(self):
    #     self.config.src_choice_flag = self.src_choice_combox.currentIndex()

    # def on_source_link_changed(self):
    #     self.config.src_link_flag = self.src_link_textbox.text()
    def on_source_link_changed(self):
        self.config.src_link_flag = self.src_link_textbox.text()

    def on_source_title_changed(self):
        self.config.src_title_flag = self.src_title_textbox.text()

    # def on_source_force_download_changed(self):
    #     self.config.src_force_download_flag = self.src_force_download_checker.isChecked()
    def on_source_force_download_changed(self):
        self.config.src_force_download_flag = self.src_force_download_checker.isChecked()

    def focusOnTranslator(self):
        idx0, idx1 = self.trans_sub_block.idx0, self.trans_sub_block.idx1
@@ -483,5 +496,8 @@ class ConfigPanel(Widget):
        self.let_uppercase_checker.setChecked(config.let_uppercase_flag)
        self.saladict_shortcut.setKeySequence(config.saladict_shortcut)
        self.searchurl_combobox.setCurrentText(config.search_url)
        self.src_force_download_checker.setChecked(config.src_force_download_flag)
        self.src_link_textbox.setText(config.src_link_flag)
        self.src_title_textbox.setText(config.src_title_flag)

        self.blockSignals(False)
 No newline at end of file
+16 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ from .configpanel import ConfigPanel
from .misc import DLModuleConfig, ProgramConfig
from .imgtrans_proj import ProjImgTrans
from dl.textdetector import TextBlock
from dl.pagesources import SourceBase


class ModuleThread(QThread):
@@ -569,7 +570,22 @@ class DLManager(QObject):
        if self.translate_thread.isRunning():
            self.translate_thread.terminate()

    def source(self):
        title = self.config.src_title_flag
        url = self.config.src_link_flag
        force_redownload = self.config.src_force_download_flag
        LOGGER.info(f'Force download set to {force_redownload}')
        LOGGER.info(f'Url set to {url}')
        LOGGER.info(f'Project title set to {title}')
        manga = SourceBase()
        manga.run(url=url, force_redownload=force_redownload, title=title)
        proj_path = manga.ReturnFullPathToProject()
        LOGGER.info(proj_path)
        self.imgtrans_proj.load(proj_path)


    def runImgtransPipeline(self):
        self.source()
        if self.imgtrans_proj.is_empty:
            LOGGER.info('proj file is empty, nothing to do')
            self.progress_msgbox.hide()
+4 −0
Original line number Diff line number Diff line
@@ -128,6 +128,10 @@ class MainWindow(FramelessWindow):
        self.bottomBar.paintmode_checkchanged.connect(self.setPaintMode)
        self.bottomBar.textblock_checkchanged.connect(self.setTextBlockMode)

        self.configPanel.src_title_textbox.setText(self.config.src_title_flag)
        self.configPanel.src_link_textbox.setText(self.config.src_link_flag)
        self.configPanel.src_force_download_checker.setChecked(self.config.src_force_download_flag)

        mainHLayout = QHBoxLayout()
        mainHLayout.addWidget(self.leftBar)
        mainHLayout.addWidget(self.centralStackWidget)
+6 −0
Original line number Diff line number Diff line
@@ -272,6 +272,9 @@ class ProgramConfig:
        gsearch_regex: bool = False,
        gsearch_range: int = 0,
        darkmode: bool = False,
        src_link_flag: str = '',
        src_force_download_flag: bool = False,
        src_title_flag: str = '',
        textselect_mini_menu: bool = True,
        saladict_shortcut: str = "Alt+S",
        search_url: str = "https://www.google.com/search?q=",
@@ -321,6 +324,9 @@ class ProgramConfig:
        self.gsearch_regex = gsearch_regex
        self.gsearch_range = gsearch_range
        self.darkmode = darkmode
        self.src_link_flag = src_link_flag
        self.src_force_download_flag = src_force_download_flag
        self.src_title_flag = src_title_flag
        self.textselect_mini_menu = textselect_mini_menu
        self.saladict_shortcut = saladict_shortcut
        self.search_url = search_url
Loading