Commit 35727b64 authored by TheTechRobo's avatar TheTechRobo
Browse files

Make aiohttp session shared. Also move to MediaWiki API for removededm.

parent 2108b3fa
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
README.md.backup
batcher.py
*.pyc
nohup.out
.idea
+11 −6
Original line number Diff line number Diff line
@@ -12,6 +12,11 @@ app = EscapingQuart(__name__)
with open('config.yml', 'r') as file:
    config_yml = yaml.safe_load(file)

@app.before_serving
async def _make_session():
    global FYT_SESSION
    FYT_SESSION = await findyoutubevideo.FytSession.new(True)

@app.route("/robots.txt")
async def robots():
    return await send_from_directory("static", "robots.txt")
@@ -21,14 +26,14 @@ async def youtubev2(id):
    """
    Provides backwards compatibility for the old endpoint.
    """
    return (await findyoutubevideo.YouTubeResponse.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"}
    return (await FYT_SESSION.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"}

async def wrapperYT(id, includeRaw):
    """
    Wrapper for generate
    """
    try:
        return await findyoutubevideo.YouTubeResponse.generate(id, includeRaw)
        return await FYT_SESSION.generate(id, includeRaw)
    except findyoutubevideo.types.InvalidVideoIdError:
        return {"status": "bad.id", "id": None}

@@ -36,7 +41,7 @@ async def wrapperYTS(id, includeRaw):
    """
    Wrapper for generateStream
    """
    return await findyoutubevideo.YouTubeResponse.generateStream(id, includeRaw)
    return await FYT_SESSION.generateStream(id, includeRaw)

@app.route("/api/v<int:v>/<site>/<id>")
@app.route("/api/v<int:v>/<id>")
@@ -128,7 +133,7 @@ async def load_thing():
    if not request.args.get("id"):
        return "Missing id parameter", 400
    t = await youtube(5, request.args['id'], "youtube", jsn=False)
    assert isinstance(t, findyoutubevideo.YouTubeResponse)
    assert isinstance(t, findyoutubevideo.Response)
    t.keys = list(itertools.chain(
        (k for k in t.keys if k.archived and not k.error),
        (k for k in t.keys if k.error),
@@ -202,8 +207,8 @@ async def api():
    """
    API docs
    """
    responseDocstring = findyoutubevideo.YouTubeResponse.__doc__
    serviceDocstring = findyoutubevideo.Service.__doc__
    responseDocstring = findyoutubevideo.Response.__doc__
    serviceDocstring = findyoutubevideo.BaseService.__doc__
    linkDocstring = findyoutubevideo.Link.__doc__
    # Parse the attributes list
    responseDocstring = await parse_lines(responseDocstring.split("Attributes:\n")[1].strip().split("\n"))
+2 −0
Original line number Diff line number Diff line
# Please read this configuration file thoroughly before hosting the site.

version: 3

methods:
+109 −84
Original line number Diff line number Diff line
@@ -4,10 +4,10 @@ All the Service implementations live here.

import random, time, aiohttp, asyncio
import typing_extensions as typing
from .types import Link, LinkContains, YouTubeService, methods, experiment_base_url
from .types import FytSession, Link, LinkContains, Service, methods, experiment_base_url
from yarl import URL

async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str, video_id: str, **report):
async def submit_experiment(session: FytSession, experiment_name: str, video_id: str, **report):
    if experiment_base_url:
        report |= {
            "experiment": experiment_name,
@@ -18,7 +18,7 @@ async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str
        except Exception:
            pass

class YouTube(YouTubeService):
class YouTube(Service):
    """
    Checks if the video is still available on YouTube.
    Thumbnail method has a few edge cases but seems the most reliable for all tested cases.
@@ -27,7 +27,7 @@ class YouTube(YouTubeService):
    configId = "youtube"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        lien = f"https://i.ytimg.com/vi/{id}/hqdefault.jpg"
        async with session.head(lien, allow_redirects=False, timeout=15) as response:
            code = response.status
@@ -58,12 +58,12 @@ class YouTube(YouTubeService):
        )


class WaybackMachine(YouTubeService):
class WaybackMachine(Service):
    name = methods["ia_wayback"]["title"]
    configId = "ia_wayback"

    @classmethod
    async def _run(cls, id: str, session: aiohttp.ClientSession):
    async def _run(cls, id: str, session: FytSession):
        ismeta = False
        archived = False

@@ -208,7 +208,7 @@ class WaybackMachine(YouTubeService):
        )


class ArchiveOrgDetails(YouTubeService):
class ArchiveOrgDetails(Service):
    name = methods["ia_details"]["title"]
    configId = "ia_details"
    items_tried = [
@@ -218,7 +218,7 @@ class ArchiveOrgDetails(YouTubeService):
    ]

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        responses = []
        is_dark = False
        archived = False
@@ -270,7 +270,7 @@ class ArchiveOrgDetails(YouTubeService):
        )


class ArchiveOrgCDX(YouTubeService):
class ArchiveOrgCDX(Service):
    """
    Queries the Archive.org CDX for an archived video thumb
    """
@@ -278,7 +278,7 @@ class ArchiveOrgCDX(YouTubeService):
    configId = "ia_cdx"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        cdx_urls = [
            f"https://web.archive.org/cdx/search/cdx?url=i.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json",
            f"https://web.archive.org/cdx/search/cdx?url=i1.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json",
@@ -340,12 +340,12 @@ class ArchiveOrgCDX(YouTubeService):
        )


class GhostArchive(YouTubeService):
class GhostArchive(Service):
    name = methods["ghostarchive"]["title"]
    configId = "ghostarchive"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        link = f"https://ghostarchive.org/varchive/{id}"
        async with session.get(link, timeout=5) as resp:
            code = resp.status
@@ -373,7 +373,7 @@ class GhostArchive(YouTubeService):
            metaonly=False, classname=cls.__name__
        )

class HackintYa(YouTubeService):
class HackintYa(Service):
    name = methods["hackint_ya"]["title"]
    note = ("Video retrieval is currently not available for technical reasons. "
            "Check back later for access instructions. This may take weeks or months."
@@ -381,7 +381,7 @@ class HackintYa(YouTubeService):
    configId = "hackint_ya"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        username: str = methods[cls.configId]["username"]
        password: str = methods[cls.configId]["password"]
        excluded: list[str] = methods[cls.configId].get("excluded", [])
@@ -410,12 +410,12 @@ class HackintYa(YouTubeService):
        )


class DistributedYoutubeArchive(YouTubeService):
class DistributedYoutubeArchive(Service):
    name = methods['distributed_youtube_archive']['title']
    configId = "distributed_youtube_archive"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        lastupdated = time.time()
        async with session.get(f"https://dya-t-api.strangled.net/api/video/{id}") as resp:
            status = resp.status
@@ -450,14 +450,14 @@ class DistributedYoutubeArchive(YouTubeService):
            classname=cls.__name__
        )

class Hobune(YouTubeService):
class Hobune(Service):
    name = methods["hobune_stream"]["title"]
    configId = "hobune_stream"
    lastretrieved = 0
    cooldown = 0.5

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        while time.time() - cls.lastretrieved < cls.cooldown:
            await asyncio.sleep(0.1)
        urls_to_try = ("https://hobune.stream/videos/{}", "https://hobune.stream/tpa-h/videos/{}")
@@ -486,84 +486,109 @@ class Hobune(YouTubeService):
            rawraw=raw, metaonly=False, classname=cls.__name__
        )

class removededm(YouTubeService):
class removededm(Service):
    name = methods["removededm"]["title"]
    configId = "removededm"
    endpoint = "https://removededm.com/w/api.php"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        got_video = False
        # Note: Video IDs starting with an underscore are redirected to have a period at the start due to
        #       limitations in the wiki software
        potential_video_links = (f"https://removededm.com/File:{id}.mp4", f"https://removededm.com/File:{id}.webm")
        potential_image_extensions = ("jpg", "png", "webp")
        potential_files = (
            ([f"{id}"], dict(contains = LinkContains(metadata = True), title = "Metadata")),
            ([f"File:{id}.mp4", f"File:{id}.webm"], dict(contains = LinkContains(video = True), title = "Video")),
            ([f"File:{id}.{ext}" for ext in potential_image_extensions], dict(
                contains = LinkContains(thumbnail = True),
                title = "Thumbnail"
            )),
            ([f"File:{id}_.{ext}" for ext in potential_image_extensions], dict(
                contains = LinkContains(single_frame = True),
                title = "Frame",
                note = "This is a single frame of the video."
            )),
        )
        archived = False
        rawraw = None
        link = f"https://removededm.com/{id}"

        async with session.head(link, timeout=15, allow_redirects=True) as response:
            archived = response.status == 200
            if archived:
                yield Link(
                    url = link,
                    contains = LinkContains(metadata = True),
                    title = "Metadata"
                )
            rawraw = response.status

        for lnk in potential_video_links:
            async with session.head(lnk, timeout=15, allow_redirects=True) as response:
                is_archived = response.status == 200
                rawraw = response.status
                if is_archived:
        api_request = {
            "action": "query",
            "format": "json",
            "titles": "|".join("|".join(i) for i, _ in potential_files),
            "formatversion": "2",
        }
        async with session.get(cls.endpoint, params = api_request) as response:
            j = await response.json()
            if "error" in j and j['error'].get("code") == "readapidenied":
                await cls.login(session)
                async with session.get(cls.endpoint, params = api_request) as response:
                    j = await response.json()
            if "error" in j:
                raise RuntimeError("API error")

        pages = set(page['title'] for page in j['query']['pages'] if not page.get("missing"))
        # MediaWiki will normalize IDs with underscores, like _kVU4fHJ9JM m_yqgZV6G5c
        for normalized_page in j['query']['normalized']:
            # Keep the old ones in the set; it doesn't hurt anything, and there might be weird behaviour in certain cases
            # Pages that don't exist are still included in the list, so check for existence beforehand
            if normalized_page['to'] in pages:
                pages.add(normalized_page['from'])
        for files, args in potential_files:
            if args['contains'].video:
                got_video = True
            for file in files:
                if file in pages:
                    archived = True
                    yield Link(
                        url = lnk,
                        contains = LinkContains(video = True),
                        title = "Video"
                    )

        for extension in potential_image_extensions:
            lnk = f"https://removededm.com/File:{id}.{extension}"
            async with session.head(lnk, timeout=15, allow_redirects=True) as response:
                is_archived = response.status == 200
                if is_archived:
                    archived = True
                    yield Link(
                        url = lnk,
                        contains = LinkContains(thumbnail = True),
                        title = "Thumbnail"
                    )
            # Sometimes, if the video itself isn't available, but they have a frame from it,
            # it'll be available here.
            frame_link = f"https://removededm.com/File:{id}_.{extension}"
            async with session.head(frame_link, timeout=15, allow_redirects=True) as response:
                is_archived = response.status == 200
                if is_archived:
                    archived = True
                    yield Link(
                        url = frame_link,
                        contains = LinkContains(single_frame = True),
                        title = "Frame",
                        note = "This is a single frame of the video.",
                    )
                    yield Link(url = f"https://removededm.com/{file}", **args)

        yield cls(
            archived=archived, rawraw=rawraw, metaonly=not got_video,
            error=None, lastupdated=time.time(), name=cls.getName(), note="", classname=cls.__name__
        )

# TODO: Make a YouTubeServiceWithCooldown or something
    @classmethod
    async def login(cls, session: FytSession):
        # Need to set up proper debug logging.
        print("Logging into removededm", flush = True)
        username = methods[cls.configId]['username']
        password = methods[cls.configId]['password']
        # Get a lockso we don't log in multiple times at once
        async with session.get_lock(cls):
            # What's wrong with just including an API key in every request? :(
            token_request_params = {
                "action": "query",
                "format": "json",
                "meta": "tokens",
                "type": "login",
                "formatversion": "2",
            }
            async with session.get(cls.endpoint, params = token_request_params) as response:
                j = await response.json()
                token = j['query']['tokens']['logintoken']

            login_request_params = {
                "action": "login",
                "format": "json",
                "formatversion": "2",
                "lgname": username,
                "lgpassword": password,
                "lgtoken": token,
            }
            async with session.post(cls.endpoint, data = login_request_params) as response:
                j = await response.json()
                if j['login']['result'] != "Success":
                    raise RuntimeError("Login failure")

class Filmot(YouTubeService):
class Filmot(Service):
    name = methods["filmot"]["title"]
    lastretrieved: int = 0
    cooldown: int = 2
    configId = "filmot"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        key = methods[cls.configId]["api_key"]

        while time.time() - cls.lastretrieved < cls.cooldown:
@@ -589,7 +614,7 @@ class Filmot(YouTubeService):
                classname=cls.__name__
        )

class Playboard(YouTubeService):
class Playboard(Service):
    """
    Playboard is metadata-only as far as I know.
    """
@@ -599,7 +624,7 @@ class Playboard(YouTubeService):
    user_agent = methods["playboard_co"]["user_agent"]

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        note = cls.note
        user_agent = cls.user_agent % random.randint(0, 100)
        url = f"https://playboard.co/en/video/{id}"
@@ -628,7 +653,7 @@ class Playboard(YouTubeService):
                classname=cls.__name__
        )

class AltCensored(YouTubeService):
class AltCensored(Service):
    """
    altCensored does not store any videos. Instead, it links to archived versions.
    """
@@ -637,7 +662,7 @@ class AltCensored(YouTubeService):
    configId = "altcensored"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        url = f"https://altcensored.com/watch?v={id}"
        async with session.get(url) as resp:
            code = resp.status
@@ -659,7 +684,7 @@ class AltCensored(YouTubeService):
                rawraw=None, metaonly=False, classname=cls.__name__
        )

class Odysee(YouTubeService):
class Odysee(Service):
    """
    Queries the LBRY YouTube Sync API to find out whether the video has been mirrored to Odysee.
    """
@@ -667,7 +692,7 @@ class Odysee(YouTubeService):
    configId = "odysee"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        lastupdated = time.time()
        async with session.get(f"https://api.lbry.com/yt/resolve?video_ids={id}") as resp:
            status = resp.status
@@ -699,17 +724,17 @@ class Odysee(YouTubeService):
            classname=cls.__name__
        )

class PreserveTube(YouTubeService):
class PreserveTube(Service):
    name = methods["preservetube"]["title"]
    note = ""
    configId = "preservetube"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        url = f"https://api.preservetube.com/video/{id}"

        # keep any pre-existing headers but patch in "Accept"
        headers = session.headers.copy()
        headers = session.session.headers.copy()
        headers.update({"Accept": "application/json"})

        async with session.get(url, headers=headers) as resp:
@@ -735,13 +760,13 @@ class PreserveTube(YouTubeService):
                rawraw=None, metaonly=False, classname=cls.__name__
        )

class NyaneOnline(YouTubeService):
class NyaneOnline(Service):
    name = methods['nyaneonline']['title']
    note = ""
    configId = "nyaneonline"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        url = f"https://www.nyane.online/video"

        async with session.head(url, params={"id": id}) as resp:
@@ -765,13 +790,13 @@ class NyaneOnline(YouTubeService):
                   rawraw=None, metaonly=False, classname=cls.__name__
        )

class LetsPlayIndex(YouTubeService):
class LetsPlayIndex(Service):
    name = methods['letsplayindex']['title']
    note = ""
    configId = "letsplayindex"

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
    async def _run(cls, id, session: FytSession):
        url = f"https://www.letsplayindex.com/video/x-{id}"
        archived = False

+162 −133

File changed.

Preview size limit exceeded, changes collapsed.