Unverified Commit 722b5e90 authored by TheTechRobo's avatar TheTechRobo Committed by GitHub
Browse files

Merge pull request #107 from voski/95-user-agent-config

parents 0344b97c 8bbe2907
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -25,6 +25,25 @@ docker run --restart=unless-stopped -p 8000:8000 -e GUNICORN_WORKERS=4 thetechro
### Running outside of Docker (unsupported)
You should be able to check the Dockerfile for what it is doing during the build (it's a glorified shell script).

## User-Agent Configuration

You can set the `User-Agent` globally in your `config.yml` file:

### Example `config.yml`

```yaml
version: 3

# Global User-Agent
user_agent: "FindYoutubeVideo/1.0 operated by TheTechRobo"

methods:
  youtube:
    title: YouTube
    enabled: true
...
```

## Licence

Copyright (c) 2022-2024 TheTechRobo
+4 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ methods:
  playboard_co:
    title: Playboard.co
    enabled: true
    user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.%s.0.0 Safari/537.36"

  removededm:
    title: Removed.edm
@@ -59,3 +60,6 @@ methods:
  preservetube:
    title: PreserveTube
    enabled: true

# Global User-Agent
user_agent: "FindYoutubeVideo/1.0 operated by TheTechRobo"
+10 −11
Original line number Diff line number Diff line
@@ -259,7 +259,6 @@ class HackintYa(YouTubeService):
            note=cls.note if archived else "", rawraw=rawraw, metaonly=False, classname=cls.__name__
        )

FYT_UA = "FindYoutubeVideo/1.0 operated by TheTechRobo"

class DistributedYoutubeArchive(YouTubeService):
    """
@@ -270,7 +269,6 @@ class DistributedYoutubeArchive(YouTubeService):

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
        user_agent = FYT_UA
        lastupdated = time.time()
        async with session.get(f"https://dya-t-api.strangled.net/api/video/{id}") as resp:
            status = resp.status
@@ -313,7 +311,6 @@ class Hobune(YouTubeService):
    async def _run(cls, id, session: aiohttp.ClientSession):
        while time.time() - cls.lastretrieved < cls.cooldown:
            await asyncio.sleep(0.1)
        user_agent = "FindYoutubeVideo/1.0 operated by thetechrobo@thetechrobo.ca"
        urls_to_try = ("https://hobune.stream/videos/{}", "https://hobune.stream/tpa-h/videos/{}")
        raw = []
        archived = False
@@ -322,7 +319,7 @@ class Hobune(YouTubeService):
        cls.lastretrieved = lastupdated
        for url in urls_to_try:
            url = url.format(id)
            async with session.head(url, headers={"User-Agent": user_agent}, timeout=5) as resp:
            async with session.head(url, timeout=5) as resp:
                code = resp.status
                raw.append(code)
            if code == 200:
@@ -422,12 +419,12 @@ class Playboard(YouTubeService):
    name = methods["playboard_co"]["title"]
    note = "The Playboard scraper is unreliable; please verify values yourself."
    configId = "playboard_co"
    user_agent = methods["playboard_co"]["user_agent"]

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession) -> typing.Self:
        note = cls.note
        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.%s.0.0 Safari/537.36"
        user_agent = user_agent % random.randint(0, 100)
        user_agent = cls.user_agent % random.randint(0, 100)
        url = f"https://playboard.co/en/video/{id}"
        async with session.get(url, headers={"User-Agent": user_agent}) as resp:
            code = resp.status
@@ -462,9 +459,8 @@ class AltCensored(YouTubeService):

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession) -> typing.Self:
        user_agent = FYT_UA
        url = f"https://altcensored.com/watch?v={id}"
        async with session.get(url, headers={"User-Agent": user_agent}) as resp:
        async with session.get(url) as resp:
            code = resp.status
        lastupdated = time.time()
        available = None
@@ -491,7 +487,6 @@ class Odysee(YouTubeService):

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession):
        user_agent = FYT_UA
        lastupdated = time.time()
        async with session.get(f"https://api.lbry.com/yt/resolve?video_ids={id}") as resp:
            status = resp.status
@@ -530,9 +525,13 @@ class PreserveTube(YouTubeService):

    @classmethod
    async def _run(cls, id, session: aiohttp.ClientSession) -> typing.Self:
        user_agent = FYT_UA
        url = f"https://api.preservetube.com/video/{id}"
        async with session.get(url, headers={"User-Agent": user_agent, "Accept": "application/json"}) as resp:

        # keep any pre-existing headers but patch in "Accept"
        headers = session.headers.copy()
        headers.update({"Accept": "application/json"})

        async with session.get(url, headers=headers) as resp:
            json = await resp.json()
        lastupdated = time.time()
        available = None
+5 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ from snscrape.base import _JSONDataclass as JSONDataclass
with open('config.yml', 'r') as file:
    config_yml = yaml.safe_load(file)
    methods = config_yml["methods"]
    user_agent = config_yml.get("user_agent") # defaults to None if not set

@dataclasses.dataclass
class Service(JSONDataclass):
@@ -294,7 +295,10 @@ class YouTubeResponse(JSONDataclass):
        keys = []
        services = cls._get_services()
        coroutines = []
        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=20)) as session:
        headers = {}
        if user_agent:
            headers["User-Agent"] = user_agent
        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=20), headers=headers) as session:
            svcs = {}
            for service in services:
                svcs[service.__name__] = service.getName()