Loading .gitignore +2 −1 Original line number Diff line number Diff line README.md.backup batcher.py *.pyc nohup.out .idea Loading app.py +11 −6 Original line number Diff line number Diff line Loading @@ -12,6 +12,11 @@ app = EscapingQuart(__name__) with open('config.yml', 'r') as file: config_yml = yaml.safe_load(file) @app.before_serving async def _make_session(): global FYT_SESSION FYT_SESSION = await findyoutubevideo.FytSession.new(True) @app.route("/robots.txt") async def robots(): return await send_from_directory("static", "robots.txt") Loading @@ -21,14 +26,14 @@ async def youtubev2(id): """ Provides backwards compatibility for the old endpoint. """ return (await findyoutubevideo.YouTubeResponse.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"} return (await FYT_SESSION.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"} async def wrapperYT(id, includeRaw): """ Wrapper for generate """ try: return await findyoutubevideo.YouTubeResponse.generate(id, includeRaw) return await FYT_SESSION.generate(id, includeRaw) except findyoutubevideo.types.InvalidVideoIdError: return {"status": "bad.id", "id": None} Loading @@ -36,7 +41,7 @@ async def wrapperYTS(id, includeRaw): """ Wrapper for generateStream """ return await findyoutubevideo.YouTubeResponse.generateStream(id, includeRaw) return await FYT_SESSION.generateStream(id, includeRaw) @app.route("/api/v<int:v>/<site>/<id>") @app.route("/api/v<int:v>/<id>") Loading Loading @@ -128,7 +133,7 @@ async def load_thing(): if not request.args.get("id"): return "Missing id parameter", 400 t = await youtube(5, request.args['id'], "youtube", jsn=False) assert isinstance(t, findyoutubevideo.YouTubeResponse) assert isinstance(t, findyoutubevideo.Response) t.keys = list(itertools.chain( (k for k in t.keys if k.archived and not k.error), (k for k in t.keys if k.error), Loading Loading @@ -202,8 +207,8 @@ async def api(): """ API docs """ responseDocstring = findyoutubevideo.YouTubeResponse.__doc__ serviceDocstring = findyoutubevideo.Service.__doc__ responseDocstring = findyoutubevideo.Response.__doc__ serviceDocstring = findyoutubevideo.BaseService.__doc__ linkDocstring = findyoutubevideo.Link.__doc__ # Parse the attributes list responseDocstring = await parse_lines(responseDocstring.split("Attributes:\n")[1].strip().split("\n")) Loading config.template.yml +2 −0 Original line number Diff line number Diff line # Please read this configuration file thoroughly before hosting the site. version: 3 methods: Loading findyoutubevideo/finder.py +109 −84 Original line number Diff line number Diff line Loading @@ -4,10 +4,10 @@ All the Service implementations live here. import random, time, aiohttp, asyncio import typing_extensions as typing from .types import Link, LinkContains, YouTubeService, methods, experiment_base_url from .types import FytSession, Link, LinkContains, Service, methods, experiment_base_url from yarl import URL async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str, video_id: str, **report): async def submit_experiment(session: FytSession, experiment_name: str, video_id: str, **report): if experiment_base_url: report |= { "experiment": experiment_name, Loading @@ -18,7 +18,7 @@ async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str except Exception: pass class YouTube(YouTubeService): class YouTube(Service): """ Checks if the video is still available on YouTube. Thumbnail method has a few edge cases but seems the most reliable for all tested cases. Loading @@ -27,7 +27,7 @@ class YouTube(YouTubeService): configId = "youtube" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lien = f"https://i.ytimg.com/vi/{id}/hqdefault.jpg" async with session.head(lien, allow_redirects=False, timeout=15) as response: code = response.status Loading Loading @@ -58,12 +58,12 @@ class YouTube(YouTubeService): ) class WaybackMachine(YouTubeService): class WaybackMachine(Service): name = methods["ia_wayback"]["title"] configId = "ia_wayback" @classmethod async def _run(cls, id: str, session: aiohttp.ClientSession): async def _run(cls, id: str, session: FytSession): ismeta = False archived = False Loading Loading @@ -208,7 +208,7 @@ class WaybackMachine(YouTubeService): ) class ArchiveOrgDetails(YouTubeService): class ArchiveOrgDetails(Service): name = methods["ia_details"]["title"] configId = "ia_details" items_tried = [ Loading @@ -218,7 +218,7 @@ class ArchiveOrgDetails(YouTubeService): ] @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): responses = [] is_dark = False archived = False Loading Loading @@ -270,7 +270,7 @@ class ArchiveOrgDetails(YouTubeService): ) class ArchiveOrgCDX(YouTubeService): class ArchiveOrgCDX(Service): """ Queries the Archive.org CDX for an archived video thumb """ Loading @@ -278,7 +278,7 @@ class ArchiveOrgCDX(YouTubeService): configId = "ia_cdx" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): cdx_urls = [ f"https://web.archive.org/cdx/search/cdx?url=i.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json", f"https://web.archive.org/cdx/search/cdx?url=i1.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json", Loading Loading @@ -340,12 +340,12 @@ class ArchiveOrgCDX(YouTubeService): ) class GhostArchive(YouTubeService): class GhostArchive(Service): name = methods["ghostarchive"]["title"] configId = "ghostarchive" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): link = f"https://ghostarchive.org/varchive/{id}" async with session.get(link, timeout=5) as resp: code = resp.status Loading Loading @@ -373,7 +373,7 @@ class GhostArchive(YouTubeService): metaonly=False, classname=cls.__name__ ) class HackintYa(YouTubeService): class HackintYa(Service): name = methods["hackint_ya"]["title"] note = ("Video retrieval is currently not available for technical reasons. " "Check back later for access instructions. This may take weeks or months." Loading @@ -381,7 +381,7 @@ class HackintYa(YouTubeService): configId = "hackint_ya" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): username: str = methods[cls.configId]["username"] password: str = methods[cls.configId]["password"] excluded: list[str] = methods[cls.configId].get("excluded", []) Loading Loading @@ -410,12 +410,12 @@ class HackintYa(YouTubeService): ) class DistributedYoutubeArchive(YouTubeService): class DistributedYoutubeArchive(Service): name = methods['distributed_youtube_archive']['title'] configId = "distributed_youtube_archive" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lastupdated = time.time() async with session.get(f"https://dya-t-api.strangled.net/api/video/{id}") as resp: status = resp.status Loading Loading @@ -450,14 +450,14 @@ class DistributedYoutubeArchive(YouTubeService): classname=cls.__name__ ) class Hobune(YouTubeService): class Hobune(Service): name = methods["hobune_stream"]["title"] configId = "hobune_stream" lastretrieved = 0 cooldown = 0.5 @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): while time.time() - cls.lastretrieved < cls.cooldown: await asyncio.sleep(0.1) urls_to_try = ("https://hobune.stream/videos/{}", "https://hobune.stream/tpa-h/videos/{}") Loading Loading @@ -486,84 +486,109 @@ class Hobune(YouTubeService): rawraw=raw, metaonly=False, classname=cls.__name__ ) class removededm(YouTubeService): class removededm(Service): name = methods["removededm"]["title"] configId = "removededm" endpoint = "https://removededm.com/w/api.php" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): got_video = False # Note: Video IDs starting with an underscore are redirected to have a period at the start due to # limitations in the wiki software potential_video_links = (f"https://removededm.com/File:{id}.mp4", f"https://removededm.com/File:{id}.webm") potential_image_extensions = ("jpg", "png", "webp") potential_files = ( ([f"{id}"], dict(contains = LinkContains(metadata = True), title = "Metadata")), ([f"File:{id}.mp4", f"File:{id}.webm"], dict(contains = LinkContains(video = True), title = "Video")), ([f"File:{id}.{ext}" for ext in potential_image_extensions], dict( contains = LinkContains(thumbnail = True), title = "Thumbnail" )), ([f"File:{id}_.{ext}" for ext in potential_image_extensions], dict( contains = LinkContains(single_frame = True), title = "Frame", note = "This is a single frame of the video." )), ) archived = False rawraw = None link = f"https://removededm.com/{id}" async with session.head(link, timeout=15, allow_redirects=True) as response: archived = response.status == 200 if archived: yield Link( url = link, contains = LinkContains(metadata = True), title = "Metadata" ) rawraw = response.status for lnk in potential_video_links: async with session.head(lnk, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 rawraw = response.status if is_archived: api_request = { "action": "query", "format": "json", "titles": "|".join("|".join(i) for i, _ in potential_files), "formatversion": "2", } async with session.get(cls.endpoint, params = api_request) as response: j = await response.json() if "error" in j and j['error'].get("code") == "readapidenied": await cls.login(session) async with session.get(cls.endpoint, params = api_request) as response: j = await response.json() if "error" in j: raise RuntimeError("API error") pages = set(page['title'] for page in j['query']['pages'] if not page.get("missing")) # MediaWiki will normalize IDs with underscores, like _kVU4fHJ9JM m_yqgZV6G5c for normalized_page in j['query']['normalized']: # Keep the old ones in the set; it doesn't hurt anything, and there might be weird behaviour in certain cases # Pages that don't exist are still included in the list, so check for existence beforehand if normalized_page['to'] in pages: pages.add(normalized_page['from']) for files, args in potential_files: if args['contains'].video: got_video = True for file in files: if file in pages: archived = True yield Link( url = lnk, contains = LinkContains(video = True), title = "Video" ) for extension in potential_image_extensions: lnk = f"https://removededm.com/File:{id}.{extension}" async with session.head(lnk, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 if is_archived: archived = True yield Link( url = lnk, contains = LinkContains(thumbnail = True), title = "Thumbnail" ) # Sometimes, if the video itself isn't available, but they have a frame from it, # it'll be available here. frame_link = f"https://removededm.com/File:{id}_.{extension}" async with session.head(frame_link, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 if is_archived: archived = True yield Link( url = frame_link, contains = LinkContains(single_frame = True), title = "Frame", note = "This is a single frame of the video.", ) yield Link(url = f"https://removededm.com/{file}", **args) yield cls( archived=archived, rawraw=rawraw, metaonly=not got_video, error=None, lastupdated=time.time(), name=cls.getName(), note="", classname=cls.__name__ ) # TODO: Make a YouTubeServiceWithCooldown or something @classmethod async def login(cls, session: FytSession): # Need to set up proper debug logging. print("Logging into removededm", flush = True) username = methods[cls.configId]['username'] password = methods[cls.configId]['password'] # Get a lockso we don't log in multiple times at once async with session.get_lock(cls): # What's wrong with just including an API key in every request? :( token_request_params = { "action": "query", "format": "json", "meta": "tokens", "type": "login", "formatversion": "2", } async with session.get(cls.endpoint, params = token_request_params) as response: j = await response.json() token = j['query']['tokens']['logintoken'] login_request_params = { "action": "login", "format": "json", "formatversion": "2", "lgname": username, "lgpassword": password, "lgtoken": token, } async with session.post(cls.endpoint, data = login_request_params) as response: j = await response.json() if j['login']['result'] != "Success": raise RuntimeError("Login failure") class Filmot(YouTubeService): class Filmot(Service): name = methods["filmot"]["title"] lastretrieved: int = 0 cooldown: int = 2 configId = "filmot" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): key = methods[cls.configId]["api_key"] while time.time() - cls.lastretrieved < cls.cooldown: Loading @@ -589,7 +614,7 @@ class Filmot(YouTubeService): classname=cls.__name__ ) class Playboard(YouTubeService): class Playboard(Service): """ Playboard is metadata-only as far as I know. """ Loading @@ -599,7 +624,7 @@ class Playboard(YouTubeService): user_agent = methods["playboard_co"]["user_agent"] @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): note = cls.note user_agent = cls.user_agent % random.randint(0, 100) url = f"https://playboard.co/en/video/{id}" Loading Loading @@ -628,7 +653,7 @@ class Playboard(YouTubeService): classname=cls.__name__ ) class AltCensored(YouTubeService): class AltCensored(Service): """ altCensored does not store any videos. Instead, it links to archived versions. """ Loading @@ -637,7 +662,7 @@ class AltCensored(YouTubeService): configId = "altcensored" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://altcensored.com/watch?v={id}" async with session.get(url) as resp: code = resp.status Loading @@ -659,7 +684,7 @@ class AltCensored(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class Odysee(YouTubeService): class Odysee(Service): """ Queries the LBRY YouTube Sync API to find out whether the video has been mirrored to Odysee. """ Loading @@ -667,7 +692,7 @@ class Odysee(YouTubeService): configId = "odysee" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lastupdated = time.time() async with session.get(f"https://api.lbry.com/yt/resolve?video_ids={id}") as resp: status = resp.status Loading Loading @@ -699,17 +724,17 @@ class Odysee(YouTubeService): classname=cls.__name__ ) class PreserveTube(YouTubeService): class PreserveTube(Service): name = methods["preservetube"]["title"] note = "" configId = "preservetube" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://api.preservetube.com/video/{id}" # keep any pre-existing headers but patch in "Accept" headers = session.headers.copy() headers = session.session.headers.copy() headers.update({"Accept": "application/json"}) async with session.get(url, headers=headers) as resp: Loading @@ -735,13 +760,13 @@ class PreserveTube(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class NyaneOnline(YouTubeService): class NyaneOnline(Service): name = methods['nyaneonline']['title'] note = "" configId = "nyaneonline" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://www.nyane.online/video" async with session.head(url, params={"id": id}) as resp: Loading @@ -765,13 +790,13 @@ class NyaneOnline(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class LetsPlayIndex(YouTubeService): class LetsPlayIndex(Service): name = methods['letsplayindex']['title'] note = "" configId = "letsplayindex" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://www.letsplayindex.com/video/x-{id}" archived = False Loading findyoutubevideo/types.py +162 −133 File changed.Preview size limit exceeded, changes collapsed. Show changes Loading
.gitignore +2 −1 Original line number Diff line number Diff line README.md.backup batcher.py *.pyc nohup.out .idea Loading
app.py +11 −6 Original line number Diff line number Diff line Loading @@ -12,6 +12,11 @@ app = EscapingQuart(__name__) with open('config.yml', 'r') as file: config_yml = yaml.safe_load(file) @app.before_serving async def _make_session(): global FYT_SESSION FYT_SESSION = await findyoutubevideo.FytSession.new(True) @app.route("/robots.txt") async def robots(): return await send_from_directory("static", "robots.txt") Loading @@ -21,14 +26,14 @@ async def youtubev2(id): """ Provides backwards compatibility for the old endpoint. """ return (await findyoutubevideo.YouTubeResponse.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"} return (await FYT_SESSION.generate(id)).coerce_to_api_version(2).json(), {"Content-Type": "application/json"} async def wrapperYT(id, includeRaw): """ Wrapper for generate """ try: return await findyoutubevideo.YouTubeResponse.generate(id, includeRaw) return await FYT_SESSION.generate(id, includeRaw) except findyoutubevideo.types.InvalidVideoIdError: return {"status": "bad.id", "id": None} Loading @@ -36,7 +41,7 @@ async def wrapperYTS(id, includeRaw): """ Wrapper for generateStream """ return await findyoutubevideo.YouTubeResponse.generateStream(id, includeRaw) return await FYT_SESSION.generateStream(id, includeRaw) @app.route("/api/v<int:v>/<site>/<id>") @app.route("/api/v<int:v>/<id>") Loading Loading @@ -128,7 +133,7 @@ async def load_thing(): if not request.args.get("id"): return "Missing id parameter", 400 t = await youtube(5, request.args['id'], "youtube", jsn=False) assert isinstance(t, findyoutubevideo.YouTubeResponse) assert isinstance(t, findyoutubevideo.Response) t.keys = list(itertools.chain( (k for k in t.keys if k.archived and not k.error), (k for k in t.keys if k.error), Loading Loading @@ -202,8 +207,8 @@ async def api(): """ API docs """ responseDocstring = findyoutubevideo.YouTubeResponse.__doc__ serviceDocstring = findyoutubevideo.Service.__doc__ responseDocstring = findyoutubevideo.Response.__doc__ serviceDocstring = findyoutubevideo.BaseService.__doc__ linkDocstring = findyoutubevideo.Link.__doc__ # Parse the attributes list responseDocstring = await parse_lines(responseDocstring.split("Attributes:\n")[1].strip().split("\n")) Loading
config.template.yml +2 −0 Original line number Diff line number Diff line # Please read this configuration file thoroughly before hosting the site. version: 3 methods: Loading
findyoutubevideo/finder.py +109 −84 Original line number Diff line number Diff line Loading @@ -4,10 +4,10 @@ All the Service implementations live here. import random, time, aiohttp, asyncio import typing_extensions as typing from .types import Link, LinkContains, YouTubeService, methods, experiment_base_url from .types import FytSession, Link, LinkContains, Service, methods, experiment_base_url from yarl import URL async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str, video_id: str, **report): async def submit_experiment(session: FytSession, experiment_name: str, video_id: str, **report): if experiment_base_url: report |= { "experiment": experiment_name, Loading @@ -18,7 +18,7 @@ async def submit_experiment(session: aiohttp.ClientSession, experiment_name: str except Exception: pass class YouTube(YouTubeService): class YouTube(Service): """ Checks if the video is still available on YouTube. Thumbnail method has a few edge cases but seems the most reliable for all tested cases. Loading @@ -27,7 +27,7 @@ class YouTube(YouTubeService): configId = "youtube" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lien = f"https://i.ytimg.com/vi/{id}/hqdefault.jpg" async with session.head(lien, allow_redirects=False, timeout=15) as response: code = response.status Loading Loading @@ -58,12 +58,12 @@ class YouTube(YouTubeService): ) class WaybackMachine(YouTubeService): class WaybackMachine(Service): name = methods["ia_wayback"]["title"] configId = "ia_wayback" @classmethod async def _run(cls, id: str, session: aiohttp.ClientSession): async def _run(cls, id: str, session: FytSession): ismeta = False archived = False Loading Loading @@ -208,7 +208,7 @@ class WaybackMachine(YouTubeService): ) class ArchiveOrgDetails(YouTubeService): class ArchiveOrgDetails(Service): name = methods["ia_details"]["title"] configId = "ia_details" items_tried = [ Loading @@ -218,7 +218,7 @@ class ArchiveOrgDetails(YouTubeService): ] @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): responses = [] is_dark = False archived = False Loading Loading @@ -270,7 +270,7 @@ class ArchiveOrgDetails(YouTubeService): ) class ArchiveOrgCDX(YouTubeService): class ArchiveOrgCDX(Service): """ Queries the Archive.org CDX for an archived video thumb """ Loading @@ -278,7 +278,7 @@ class ArchiveOrgCDX(YouTubeService): configId = "ia_cdx" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): cdx_urls = [ f"https://web.archive.org/cdx/search/cdx?url=i.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json", f"https://web.archive.org/cdx/search/cdx?url=i1.ytimg.com/vi/{id}*&collapse=digest&filter=statuscode:200&mimetype:image/jpeg&output=json", Loading Loading @@ -340,12 +340,12 @@ class ArchiveOrgCDX(YouTubeService): ) class GhostArchive(YouTubeService): class GhostArchive(Service): name = methods["ghostarchive"]["title"] configId = "ghostarchive" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): link = f"https://ghostarchive.org/varchive/{id}" async with session.get(link, timeout=5) as resp: code = resp.status Loading Loading @@ -373,7 +373,7 @@ class GhostArchive(YouTubeService): metaonly=False, classname=cls.__name__ ) class HackintYa(YouTubeService): class HackintYa(Service): name = methods["hackint_ya"]["title"] note = ("Video retrieval is currently not available for technical reasons. " "Check back later for access instructions. This may take weeks or months." Loading @@ -381,7 +381,7 @@ class HackintYa(YouTubeService): configId = "hackint_ya" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): username: str = methods[cls.configId]["username"] password: str = methods[cls.configId]["password"] excluded: list[str] = methods[cls.configId].get("excluded", []) Loading Loading @@ -410,12 +410,12 @@ class HackintYa(YouTubeService): ) class DistributedYoutubeArchive(YouTubeService): class DistributedYoutubeArchive(Service): name = methods['distributed_youtube_archive']['title'] configId = "distributed_youtube_archive" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lastupdated = time.time() async with session.get(f"https://dya-t-api.strangled.net/api/video/{id}") as resp: status = resp.status Loading Loading @@ -450,14 +450,14 @@ class DistributedYoutubeArchive(YouTubeService): classname=cls.__name__ ) class Hobune(YouTubeService): class Hobune(Service): name = methods["hobune_stream"]["title"] configId = "hobune_stream" lastretrieved = 0 cooldown = 0.5 @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): while time.time() - cls.lastretrieved < cls.cooldown: await asyncio.sleep(0.1) urls_to_try = ("https://hobune.stream/videos/{}", "https://hobune.stream/tpa-h/videos/{}") Loading Loading @@ -486,84 +486,109 @@ class Hobune(YouTubeService): rawraw=raw, metaonly=False, classname=cls.__name__ ) class removededm(YouTubeService): class removededm(Service): name = methods["removededm"]["title"] configId = "removededm" endpoint = "https://removededm.com/w/api.php" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): got_video = False # Note: Video IDs starting with an underscore are redirected to have a period at the start due to # limitations in the wiki software potential_video_links = (f"https://removededm.com/File:{id}.mp4", f"https://removededm.com/File:{id}.webm") potential_image_extensions = ("jpg", "png", "webp") potential_files = ( ([f"{id}"], dict(contains = LinkContains(metadata = True), title = "Metadata")), ([f"File:{id}.mp4", f"File:{id}.webm"], dict(contains = LinkContains(video = True), title = "Video")), ([f"File:{id}.{ext}" for ext in potential_image_extensions], dict( contains = LinkContains(thumbnail = True), title = "Thumbnail" )), ([f"File:{id}_.{ext}" for ext in potential_image_extensions], dict( contains = LinkContains(single_frame = True), title = "Frame", note = "This is a single frame of the video." )), ) archived = False rawraw = None link = f"https://removededm.com/{id}" async with session.head(link, timeout=15, allow_redirects=True) as response: archived = response.status == 200 if archived: yield Link( url = link, contains = LinkContains(metadata = True), title = "Metadata" ) rawraw = response.status for lnk in potential_video_links: async with session.head(lnk, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 rawraw = response.status if is_archived: api_request = { "action": "query", "format": "json", "titles": "|".join("|".join(i) for i, _ in potential_files), "formatversion": "2", } async with session.get(cls.endpoint, params = api_request) as response: j = await response.json() if "error" in j and j['error'].get("code") == "readapidenied": await cls.login(session) async with session.get(cls.endpoint, params = api_request) as response: j = await response.json() if "error" in j: raise RuntimeError("API error") pages = set(page['title'] for page in j['query']['pages'] if not page.get("missing")) # MediaWiki will normalize IDs with underscores, like _kVU4fHJ9JM m_yqgZV6G5c for normalized_page in j['query']['normalized']: # Keep the old ones in the set; it doesn't hurt anything, and there might be weird behaviour in certain cases # Pages that don't exist are still included in the list, so check for existence beforehand if normalized_page['to'] in pages: pages.add(normalized_page['from']) for files, args in potential_files: if args['contains'].video: got_video = True for file in files: if file in pages: archived = True yield Link( url = lnk, contains = LinkContains(video = True), title = "Video" ) for extension in potential_image_extensions: lnk = f"https://removededm.com/File:{id}.{extension}" async with session.head(lnk, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 if is_archived: archived = True yield Link( url = lnk, contains = LinkContains(thumbnail = True), title = "Thumbnail" ) # Sometimes, if the video itself isn't available, but they have a frame from it, # it'll be available here. frame_link = f"https://removededm.com/File:{id}_.{extension}" async with session.head(frame_link, timeout=15, allow_redirects=True) as response: is_archived = response.status == 200 if is_archived: archived = True yield Link( url = frame_link, contains = LinkContains(single_frame = True), title = "Frame", note = "This is a single frame of the video.", ) yield Link(url = f"https://removededm.com/{file}", **args) yield cls( archived=archived, rawraw=rawraw, metaonly=not got_video, error=None, lastupdated=time.time(), name=cls.getName(), note="", classname=cls.__name__ ) # TODO: Make a YouTubeServiceWithCooldown or something @classmethod async def login(cls, session: FytSession): # Need to set up proper debug logging. print("Logging into removededm", flush = True) username = methods[cls.configId]['username'] password = methods[cls.configId]['password'] # Get a lockso we don't log in multiple times at once async with session.get_lock(cls): # What's wrong with just including an API key in every request? :( token_request_params = { "action": "query", "format": "json", "meta": "tokens", "type": "login", "formatversion": "2", } async with session.get(cls.endpoint, params = token_request_params) as response: j = await response.json() token = j['query']['tokens']['logintoken'] login_request_params = { "action": "login", "format": "json", "formatversion": "2", "lgname": username, "lgpassword": password, "lgtoken": token, } async with session.post(cls.endpoint, data = login_request_params) as response: j = await response.json() if j['login']['result'] != "Success": raise RuntimeError("Login failure") class Filmot(YouTubeService): class Filmot(Service): name = methods["filmot"]["title"] lastretrieved: int = 0 cooldown: int = 2 configId = "filmot" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): key = methods[cls.configId]["api_key"] while time.time() - cls.lastretrieved < cls.cooldown: Loading @@ -589,7 +614,7 @@ class Filmot(YouTubeService): classname=cls.__name__ ) class Playboard(YouTubeService): class Playboard(Service): """ Playboard is metadata-only as far as I know. """ Loading @@ -599,7 +624,7 @@ class Playboard(YouTubeService): user_agent = methods["playboard_co"]["user_agent"] @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): note = cls.note user_agent = cls.user_agent % random.randint(0, 100) url = f"https://playboard.co/en/video/{id}" Loading Loading @@ -628,7 +653,7 @@ class Playboard(YouTubeService): classname=cls.__name__ ) class AltCensored(YouTubeService): class AltCensored(Service): """ altCensored does not store any videos. Instead, it links to archived versions. """ Loading @@ -637,7 +662,7 @@ class AltCensored(YouTubeService): configId = "altcensored" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://altcensored.com/watch?v={id}" async with session.get(url) as resp: code = resp.status Loading @@ -659,7 +684,7 @@ class AltCensored(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class Odysee(YouTubeService): class Odysee(Service): """ Queries the LBRY YouTube Sync API to find out whether the video has been mirrored to Odysee. """ Loading @@ -667,7 +692,7 @@ class Odysee(YouTubeService): configId = "odysee" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): lastupdated = time.time() async with session.get(f"https://api.lbry.com/yt/resolve?video_ids={id}") as resp: status = resp.status Loading Loading @@ -699,17 +724,17 @@ class Odysee(YouTubeService): classname=cls.__name__ ) class PreserveTube(YouTubeService): class PreserveTube(Service): name = methods["preservetube"]["title"] note = "" configId = "preservetube" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://api.preservetube.com/video/{id}" # keep any pre-existing headers but patch in "Accept" headers = session.headers.copy() headers = session.session.headers.copy() headers.update({"Accept": "application/json"}) async with session.get(url, headers=headers) as resp: Loading @@ -735,13 +760,13 @@ class PreserveTube(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class NyaneOnline(YouTubeService): class NyaneOnline(Service): name = methods['nyaneonline']['title'] note = "" configId = "nyaneonline" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://www.nyane.online/video" async with session.head(url, params={"id": id}) as resp: Loading @@ -765,13 +790,13 @@ class NyaneOnline(YouTubeService): rawraw=None, metaonly=False, classname=cls.__name__ ) class LetsPlayIndex(YouTubeService): class LetsPlayIndex(Service): name = methods['letsplayindex']['title'] note = "" configId = "letsplayindex" @classmethod async def _run(cls, id, session: aiohttp.ClientSession): async def _run(cls, id, session: FytSession): url = f"https://www.letsplayindex.com/video/x-{id}" archived = False Loading
findyoutubevideo/types.py +162 −133 File changed.Preview size limit exceeded, changes collapsed. Show changes