Commit 5bae0f0c authored by TheTechRobo's avatar TheTechRobo
Browse files

Better handling of videoinfo, new experiment

Only query fakeurl when videoinfo doesn't find a capture.
If fakeurl works, send a report.
parent 92e0c825
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
=== Current Experiments ===

- Wayback Machine: If a video is available on fakeurl, but not videoinfo, send a report to my server. There seem to be intermittent periods where videoinfo doesn't provide a video, but by the time I check, it is fixed. I'd like to see what these API responses look like to try and detect them. The video ID and videoinfo response are submitted.

=== Previous Experiments ===

- Wayback Machine: Checks two supposedly equivalent API endpoints. If they don't match, send a report to my server. I have been made aware of a video that is not recognized on the fakeurl endpoint but is recognized on the videoinfo endpoint. The video ID, fakeurl result, and videoinfo result are included in the report.
	(Conclusion: fakeurl is more susceptible to consistent false negatives. videoinfo is more susceptible to temporary false negatives.)
+41 −24
Original line number Diff line number Diff line
@@ -65,21 +65,7 @@ class WaybackMachine(YouTubeService):
    @classmethod
    async def _run(cls, id: str, session: aiohttp.ClientSession):
        ismeta = False
        lien = f"https://web.archive.org/web/0id_/http://wayback-fakeurl.archive.org/yt/{id}"

        async with session.head(lien, allow_redirects=False, timeout=15) as response:
            redirect = response.headers.get("location")
            archived = bool(redirect)  # Archived if there is a redirect
            if redirect:
                u = URL(redirect)
                assert u.path != "/sry", "Redirected to sorry page. Is IA down?"
            fakeurl_archived = archived
            if archived:
                yield Link(
                    url = lien,
                    contains = LinkContains(video = True),
                    title = "Video"
                )
        archived = False

        params = {"vtype": "youtube", "vid": id}
        async with session.get("https://web.archive.org/__wb/videoinfo", params=params, timeout=5) as response:
@@ -87,21 +73,52 @@ class WaybackMachine(YouTubeService):
            videoinfo_archived = bool(viresp.get("formats"))
            if videoinfo_archived:
                archived = True
        if fakeurl_archived != videoinfo_archived:
            await submit_experiment(session, "wb-index-weirdness", id, fakeurl=fakeurl_archived, videoinfo=videoinfo_archived)
            if videoinfo_archived:
                # TODO: better sorting system; right now while this is
                # an edge case I'm not going to bother, but if it ever is the default
                # this should be improved
                formats = viresp['formats']
                for format in formats:
                    url, ts = format['url'], format['timestamp']
                    lien = f"https://web.archive.org/web/{ts}/{url}"
                    mimetype = format['mimetype']
                    m_type, m_format = mimetype.split("/", 1)
                    if m_type == "video":
                        title = f"Video ({m_format})"
                        contains = LinkContains(
                            video = True,
                            standalone_video = True
                        )
                    elif m_type == "audio":
                        title = f"Audio ({m_format})"
                        contains = LinkContains(
                            standalone_audio = True
                        )
                    else:
                        title = mimetype
                        contains = LinkContains(
                            video = True,
                            standalone_video = True,
                            standalone_audio = True
                        )
                    yield Link(
                        url = lien,
                        contains = LinkContains(video = True),
                        title = "Video"
                        contains = contains,
                        title = title,
                    )

        if not archived:
            lien = f"https://web.archive.org/web/0id_/http://wayback-fakeurl.archive.org/yt/{id}"
            async with session.head(lien, allow_redirects=False, timeout=15) as response:
                redirect = response.headers.get("location")
                archived = bool(redirect)
                if redirect:
                    assert URL(redirect) != "/sry", "Redirected to sorry page. Is IA down?"
                fakeurl_archived = archived
                if fakeurl_archived:
                    yield Link(
                        url = lien,
                        contains = LinkContains(video = True, standalone_video = True),
                        title = "Video",
                        note = "A backup endpoint was used. More formats may be available later.",
                    )
                    await submit_experiment(session, "wb-vi-failures", id, fakeurl=fakeurl_archived, videoinfo=videoinfo_archived, viresp=viresp)

        response2 = None
        url_formats = [
@@ -152,7 +169,7 @@ class WaybackMachine(YouTubeService):
                        lien = response2["archived_snapshots"]["closest"]["url"]
                        break

        rawraw = (redirect, viresp, response2)
        rawraw = (None, viresp, response2)
        yield cls(
            archived=archived, rawraw=rawraw,
            lastupdated=time.time(), name=cls.getName(), note="", metaonly=ismeta,
+5 −0
Original line number Diff line number Diff line
@@ -232,6 +232,11 @@ class LinkContains(JSONDataclass):
    thumbnail: bool = False
    captions: bool = False

    standalone_video: bool = False
    """Just the video, no audio."""
    standalone_audio: bool = False
    """Just the audio, no video."""

    single_frame: bool = False
    """A single frame from the video."""

+0 −1
Original line number Diff line number Diff line
@@ -239,7 +239,6 @@ function finish(vid1) {
                        break;
                    }
                    case possible_states.Verdict: {
                        console.log(`NumArchived=${numArchived}`);
                        if (numArchived <= 0) {
                            if (dd !== null) {
                                dd.setAttribute("open", "true");