Merge 117300e1bd into 4429fd0450

3 days ago · 774f00daf4
parent 4429fd0450 117300e1bd
commit 774f00daf4
2 changed files with 576 additions and 226 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -132,17 +132,20 @@ class TestInfoExtractor(unittest.TestCase):
        self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
    def test_search_json_ld_realworld(self):
-        _TESTS = [
+        _TESTS = [(
            # https://github.com/ytdl-org/youtube-dl/issues/23306
-            (
+            r'''
-                r'''<script type="application/ld+json">
+            <script type="application/ld+json">
            {
                "@context": "http://schema.org/",
                "@type": "VideoObject",
                "name": "1 On 1 With Kleio",
                "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
                "duration": "PT0H12M23S",
-"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
+                "thumbnailUrl": [
                    "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
                    "https://imggen.eporner.com/780814/1920/1080/9.jpg"
                ],
                "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
                "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
                "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
@ -155,104 +158,140 @@ class TestInfoExtractor(unittest.TestCase):
                "uploadDate": "2015-12-05T21:24:35+01:00",
                "interactionStatistic": {
                    "@type": "InteractionCounter",
-"interactionType": { "@type": "http://schema.org/WatchAction" },
+                    "interactionType": {
                        "@type": "http://schema.org/WatchAction"
                    },
                    "userInteractionCount": 1120958
-}, "aggregateRating": {
+                },
                "aggregateRating": {
                    "@type": "AggregateRating",
                    "ratingValue": "88",
                    "ratingCount": "630",
                    "bestRating": "100",
                    "worstRating": "0"
-}, "actor": [{
+                },
                "actor": [{
                    "@type": "Person",
                    "name": "Kleio Valentien",
                    "url": "https://www.eporner.com/pornstar/kleio-valentien/"
-}]}
+                }]
-                </script>''',
+            }
-                {
+            </script>
            ''', {
                'ext': 'mp4',
                'title': '1 On 1 With Kleio',
                'age_limit': 18,
                'artists': ['1 On 1 With Kleio'],
                'average_rating': 88,
                'description': 'Kleio Valentien',
-                    'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+                'duration': 743,
                'height': 1080,
                'thumbnails': [
                    {'url': 'https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg'},
                    {'url': 'https://imggen.eporner.com/780814/1920/1080/9.jpg'},
                ],
                'timestamp': 1449347075,
-                    'duration': 743.0,
+                'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
                'view_count': 1120958,
                'width': 1920,
-                    'height': 1080,
+            }, {},
-                },
+        ), (
-                {},
+            # https://github.com/yt-dlp/yt-dlp/pull/1983
-            ),
+            r'''
-            (
+            <script type="application/ld+json">
                r'''<script type="application/ld+json">
            {
                "@context": "https://schema.org",
-      "@graph": [
+                "@graph": [{
      {
                    "@type": "NewsArticle",
-      "mainEntityOfPage": {
+                    "name": "Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia",
-      "@type": "WebPage",
+                    "headline": "Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia",
-      "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn"
+                    "description": "“Arif”, an environmental journalist investigating the shark fishing trade in Surabaya, Indonesia, speaks to industry insiders to understand the business — from the port where fishermen sell a wide variety of sharks, to a drying facility where sharks are processed before exporters pick them up. Shark fishing is l",
-      },
+                    "about": [
-      "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
+                        "leopard sharks",
-      "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
+                        "Rewilding Sharks",
-      "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.",
+                        "sustainability",
                        "animals"
                    ],
                    "image": {
                        "@type": "ImageObject",
-      "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg",
+                        "url": "https://dam.mediacorp.sg/image/upload/s--0VYzW7We--/c_fill,g_auto,h_338,w_600/f_auto,q_auto/v1/mediacorp/cna/image/2025/03/17/1742148440-image.jpg?itok=rav-cQ_p",
-      "width": 1100,
+                        "width": "100",
-      "height": 756            },
+                        "height": "100"
-      "datePublished": "2021-11-10T08:50:00+03:00",
+                    },
-      "dateModified": "2021-11-10T08:52:53+03:00",
+                    "datePublished": "2025-03-17T01:58:00+08:00",
                    "dateModified": "2025-04-08T12:07:12+08:00",
                    "author": {
                        "@type": "Person",
-      "@id": "https://www.ant1news.gr/",
+                        "@id": "https://www.channelnewsasia.com/",
-      "name": "Ant1news",
+                        "name": "CNA",
-      "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
+                        "url": "https://www.channelnewsasia.com/"
      "url": "https://www.ant1news.gr/"
                    },
                    "publisher": {
                        "@type": "Organization",
-      "@id": "https://www.ant1news.gr#publisher",
+                        "@id": "https://www.channelnewsasia.com/",
-      "name": "Ant1news",
+                        "name": "CNA",
-      "url": "https://www.ant1news.gr",
+                        "url": "https://www.channelnewsasia.com/",
                        "logo": {
                            "@type": "ImageObject",
-      "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
+                            "url": "https://www.channelnewsasia.com/sites/default/themes/mc_cna_theme/images/logo.svg",
-      "width": 400,
+                            "width": "100",
-      "height": 400                },
+                            "height": "100"
      "sameAs": [
      "https://www.facebook.com/Ant1news.gr",
      "https://twitter.com/antennanews",
      "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw",
      "https://www.instagram.com/ant1news/"
      ]
      },
      "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news",
      "articleSection": "Κοινωνία"
                        }
-      ]
+                    },
                    "mainEntityOfPage": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256"
                }, {
                    "@type": "VideoObject",
                    "thumbnailUrl": "https://cf-images.ap-southeast-1.prod.boltdns.net/v1/static/6057984932001/b49a7cc0-bbd3-4634-8049-756f0bf3d0c3/3e2f7ea5-0290-4760-889f-b084117a46e8/1280x720/match/image.jpg",
                    "uploadDate": "2025-04-08T12:07:12+08:00",
                    "description": "“Arif”, an environmental journalist investigating the shark fishing trade in Surabaya, Indonesia, speaks to industry insiders to understand the business — from the port where fishermen sell a wide variety of sharks, to a drying facility where sharks are processed before exporters pick them up. Shark fishing is legal in Indonesia, and sharks bring in good money. Some species are highly sought after. For example, leopard sharks are prized for their special skin. A fisherman revealed that he gets requests for up to 600kg of leopard shark in a month, worth about 282 million rupiah (US$17,000).",
                    "name": "Inside the lucrative trade of shark fishing in Indonesia",
                    "@id": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256",
                    "duration": "PT472S",
                    "embedUrl": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256?view=embed",
                    "contentUrl": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256"
                }]
            }
-                </script>''',
+            </script>
            ''', {
                'title': 'Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia',
                'creators': ['CNA'],
                'description': 'md5:4ce967a72d546b32935cb98c8722346b',
                'modified_timestamp': 1744085232,
                'release_timestamp': 1742147880,
                'thumbnails': [{
                    'height': 100,
                    'url': 'https://dam.mediacorp.sg/image/upload/s--0VYzW7We--/c_fill,g_auto,h_338,w_600/f_auto,q_auto/v1/mediacorp/cna/image/2025/03/17/1742148440-image.jpg?itok=rav-cQ_p',
                    'width': 100,
                }],
            }, {'expected_type': 'NewsArticle'},
        ), (
            # https://github.com/yt-dlp/yt-dlp/pull/2031
            r'''
            <script type="application/ld+json">
            {
-                    'timestamp': 1636523400,
+                "url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
                    'title': 'md5:91fe569e952e4d146485740ae927662b',
                },
                {'expected_type': 'NewsArticle'},
            ),
            (
                r'''<script type="application/ld+json">
                {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
                "name":"Het journaal 19u",
                "description":"Het journaal 19u van vrijdag 31 december 2021.",
-                "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"},
+                "potentialAction":{
-                "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"},
+                    "url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8",
                    "@type":"ShareAction"
                },
                "mainEntityOfPage":{
                    "@id":"1640092242445",
                    "@type":"WebPage"
                },
                "publication":[{
                    "startDate":"2021-12-31T19:00:00.000+01:00",
                    "endDate":"2022-01-30T23:55:00.000+01:00",
-                    "publishedBy":{"name":"een","@type":"Organization"},
+                    "publishedBy":{
-                    "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"},
+                        "name":"een",
                        "@type":"Organization"
                    },
                    "publishedOn":{
                        "url":"https://www.vrt.be/vrtnu/",
                        "name":"VRT NU",
                        "@type":"BroadcastService"
                    },
                    "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8",
                    "@type":"BroadcastEvent"
                }],
@ -280,11 +319,24 @@ class TestInfoExtractor(unittest.TestCase):
                },
                "genre":["Nieuws en actua"],
                "episodeNumber":365,
-                "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"},
+                "partOfSeries":{
-                "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"},
+                    "name":"Het journaal",
-                "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script>
+                    "@id":"222831405527",
-                ''',
+                    "@type":"TVSeries"
-                {
+                },
                "partOfSeason":{
                    "name":"Seizoen 2021",
                    "@id":"961809365527",
                    "@type":"TVSeason"
                },
                "@context":"https://schema.org",
                "@id":"961685295527",
                "@type":"TVEpisode"
            }
            </script>
            ''', {
                'title': 'Het journaal - Aflevering 365 (Seizoen 2021)',
                'artists': ['Het journaal - Aflevering 365 (Seizoen 2021)'],
                'chapters': [
                    {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440},
                    {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179},
@ -297,50 +349,279 @@ class TestInfoExtractor(unittest.TestCase):
                    {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873},
                    {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23},
                ],
-                    'title': 'Het journaal - Aflevering 365 (Seizoen 2021)',
+                'description': 'Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.',
                'duration': 2079.23,
                'episode': 'Het journaal 19u',
                'episode_number': 365,
                'genres': ['Nieuws en actua'],
                'season': 'Het journaal 19u',
                'series': 'Het journaal',
                'thumbnails': [{'url': 'https://images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg'}],
                'timestamp': 1640973600,
            }, {},
-            ),
+        ), (
-            (
+            # thumbnailUrl, {str}
                # test multiple thumbnails in a list
            r'''
            <script type="application/ld+json">
 {"@context":"https://schema.org",
 "@type":"VideoObject",
 "thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
 </script>''',
            {
                "@context":"https://schema.org",
                "@type":"VideoObject",
                "thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"
            }
            </script>
            ''', {
                'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
-                },
+            }, {},
-                {},
+        ), (
-            ),
+            # no scheme URL
-            (
+            # thumbnail_url, {str}
                # test single thumbnail
            r'''
            <script type="application/ld+json">
-{"@context":"https://schema.org",
+            {
                "@context": "https://schema.org",
                "@type": "VideoObject",
-"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
+                "thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
-</script>''',
+            }
            </script>
            ''', {
                'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
            }, {},
        ), (
            # no scheme URL
            # thumbnailURL, {str}
            r'''
            <script type="application/ld+json">
            {
-                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                "@context": "https://schema.org/",
                "@type": "VideoObject",
                "thumbnailURL": "//images.ctfassets.net/o78em1y1w4i4/2XrBpSdjPK1OJXAAFYU8iw/76ffd0f25465502c9a704dcfc2aa6c64/Teaser-Elsevier-careers-video-thumbnail.jpg"
            }
            </script>
            ''', {
                'thumbnails': [{'url': 'https://images.ctfassets.net/o78em1y1w4i4/2XrBpSdjPK1OJXAAFYU8iw/76ffd0f25465502c9a704dcfc2aa6c64/Teaser-Elsevier-careers-video-thumbnail.jpg'}],
            }, {},
        ), (
            # escaped URL
            # image, {"url": {str}}
            r'''
            <script type="application/ld+json">
            {
                "@context": "https:\/\/schema.org",
                "@type": "NewsArticle",
                "image": {
                    "@context": "https:\/\/schema.org",
                    "@type": "ImageObject",
                    "height": 630,
                    "width": 1200,
                    "url": "https:\/\/assets1.cbsnewsstatic.com\/hub\/i\/r\/2025\/08\/29\/597b721d-9c95-424d-8720-05df6b8a4a4e\/thumbnail\/1200x630\/45e846ad5f209972ab225651b40d0b4d\/cbsn-fusion-al-gore-on-20-years-since-hurricane-katrina-thumbnail.jpg"
                }
            }
            </script>
            ''', {
                'thumbnails': [{
                    'height': 630,
                    'url': 'https://assets1.cbsnewsstatic.com/hub/i/r/2025/08/29/597b721d-9c95-424d-8720-05df6b8a4a4e/thumbnail/1200x630/45e846ad5f209972ab225651b40d0b4d/cbsn-fusion-al-gore-on-20-years-since-hurricane-katrina-thumbnail.jpg',
                    'width': 1200,
                }],
            }, {},
        ), (
            # nested width/height
            # image, {"url": {str}}
            r'''
            <script type="application/ld+json">
            {
                "@context": "http://schema.org",
                "@type": "NewsArticle",
                "image": {
                    "@type": "ImageObject",
                    "width": {
                        "@type": "QuantitativeValue",
                        "unitText": "px",
                        "value": 1024
                    },
-                {},
+                    "height": {
-            ),
+                        "@type": "QuantitativeValue",
-            (
+                        "unitText": "px",
-                # test thumbnail_url key without URL scheme
+                        "value": 576
                    },
                    "url": "https://ichef.bbci.co.uk/ace/standard/1024/cpsprodpb/398e/live/1cc4dab0-8689-11f0-9cf6-cbf3e73ce2b9.jpg"
                }
            }
            </script>
            ''', {
                'thumbnails': [{
                    'height': 576,
                    'url': 'https://ichef.bbci.co.uk/ace/standard/1024/cpsprodpb/398e/live/1cc4dab0-8689-11f0-9cf6-cbf3e73ce2b9.jpg',
                    'width': 1024,
                }],
            }, {},
        ), (
            # image, {"url": [{str}]}
            r'''
            <script type="application/ld+json">
            {
-"@context": "https://schema.org",
+                "@context": "https://schema.org/",
                "@type": [
                    "NewsArticle",
                    "Article"
                ],
                "image": {
                    "@type": "ImageObject",
                    "url": [
                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1103&smart=true",
                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1470&smart=true",
                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=980&height=980&smart=true"
                    ]
                }
            }
            </script>
            ''', {
                'thumbnails': [{
                    'height': 1103,
                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1103&smart=true',
                    'width': 1960,
                }, {
                    'height': 1470,
                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1470&smart=true',
                    'width': 1960,
                }, {
                    'height': 980,
                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=980&height=980&smart=true',
                    'width': 980,
                }],
            }, {},
        ), (
            # image, [{"contentUrl": {str}}]
            r'''
            <script type="application/ld+json">
            {
                "@context": "https://schema.org/",
                "@type": "VideoObject",
-"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
+                "image": [
 }</script>''',
                    {
-                    'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
+                        "@type": "ImageObject",
                        "contentUrl": "https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-gseagaleriexl.jpg",
                        "height": 900,
                        "width": 1600
                    },
-                {},
+                    {
-            ),
+                        "@type": "ImageObject",
                        "contentUrl": "https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-HintergrundL.jpg",
                        "height": 1152,
                        "width": 1536
                    }
                ]
            }
            </script>
            ''', {
                'thumbnails': [{
                    'height': 900,
                    'url': 'https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-gseagaleriexl.jpg',
                    'width': 1600,
                }, {
                    'height': 1152,
                    'url': 'https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-HintergrundL.jpg',
                    'width': 1536,
                }],
            }, {},
        ), (
            # duplicate thumbnails
            # image, [{"url": {str}}],
            # thumbnail, [{"url": {str}}]
            # thumbnailUrl, [{str}]
            r'''
            <script type="application/ld+json">
            {
                "@context": "https://schema.org",
                "@type": "VideoObject",
                "image": [{
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
                    "width": 1920,
                    "height": 1080
                }, {
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
                    "width": 1400,
                    "height": 1400
                }, {
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280",
                    "width": 1280,
                    "height": 960
                }],
                "thumbnail": [{
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
                    "width": 1920,
                    "height": 1080
                }, {
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
                    "width": 1400,
                    "height": 1400
                }, {
                    "@type": "ImageObject",
                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280",
                    "width": 1280,
                    "height": 960
                }],
                "thumbnailUrl": [
                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280"
                ]
            }
            </script>
            ''', {
                'thumbnails': [{
                    'height': 1080,
                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg',
                    'width': 1920,
                }, {
                    'height': 1400,
                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg',
                    'width': 1400,
                }, {
                    'height': 960,
                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280',
                    'width': 1280,
                }],
            }, {},
        ), (
            # dateCreated, \d{4}
            r'''
            <script type="application/ld+json">
            {
                "@context": "https://schema.org",
                "@type": "Movie",
                "dateCreated": "2025"
            }
            </script>
            ''', {'release_year': 2025}, {},
        ), (
            # dateCreated, \d{4}-\d{2}-\d{2}
            r'''
            <script type="application/ld+json">
            {
                "@context": "https://schema.org",
                "@type": "Movie",
                "dateCreated": "2025-09-01"
            }
            </script>
            ''', {'upload_date': '20250901'}, {},
        ), (
            # dateCreated, ISO 8601
            r'''
            <script type="application/ld+json">
            {
                "@context": "https://schema.org",
                "@type": "Movie",
                "dateCreated": "2025-09-01T00:00:00Z"
            }
            </script>
            ''', {'timestamp': 1756684800}, {},
        )]
        for html, expected_dict, search_json_ld_kwargs in _TESTS:
            expect_dict(
                self,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -72,11 +72,13 @@ from ..utils import (
    mimetype2ext,
    netrc_from_content,
    orderedSet,
    parse_age_limit,
    parse_bitrate,
    parse_codecs,
    parse_duration,
    parse_iso8601,
    parse_m3u8_attributes,
    parse_qs,
    parse_resolution,
    qualities,
    sanitize_url,
@ -84,13 +86,11 @@ from ..utils import (
    str_or_none,
    str_to_int,
    strip_or_none,
    traverse_obj,
    truncate_string,
    try_call,
    try_get,
    unescapeHTML,
    unified_strdate,
    unified_timestamp,
    url_basename,
    url_or_none,
    urlhandle_detect_ext,
@ -102,6 +102,7 @@ from ..utils import (
 )
 from ..utils._utils import _request_dump_filename
 from ..utils.jslib import devalue
 from ..utils.traversal import traverse_obj
 class InfoExtractor:
@ -1673,38 +1674,76 @@ class InfoExtractor:
                chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
                info['chapters'] = chapters
        def extract_thumbnail_information(e):
            thumbnails = traverse_obj(e, ((
                'image', 'thumbnail', 'thumbnailUrl', 'thumbnailURL', 'thumbnail_url',
            ), (
                ({str}, {url_or_none}, {'url': None}, filter),
                ({dict}, 'url', {list}, ..., {'url': None}, filter),
                (({list}, ({dict}, all)), lambda _, v:
                    url_or_none(v.get('url')) or url_or_none(v.get('contentUrl'))),
                ({list}, ..., {str}, {url_or_none}, {'url': None}, filter),
            ), {
                'height': ('height', (None, 'value'), {int_or_none}, any),
                'url': (('contentUrl', 'url'), {str}, {unescapeHTML}, {self._proto_relative_url}, any),
                'width': ('width', (None, 'value'), {int_or_none}, any),
            }, all, {orderedSet}, lambda _, v: url_or_none(v['url'])))
            dim_keys, url_table = {'height', 'width'}, {}
            for thumbnail in thumbnails:
                url = thumbnail['url']
                query = parse_qs(thumbnail['url'])
                for key, alt_keys in (
                    ('height', ('height', 'h')),
                    ('width', ('width', 'w')),
                ):
                    val = traverse_obj(query, (alt_keys, -1, {int_or_none}, any))
                    if val is not None:
                        thumbnail.setdefault(key, val)
                res = parse_resolution(url_basename(url))
                for key in dim_keys:
                    val = res.get(key)
                    if val is not None:
                        thumbnail.setdefault(key, val)
                current = url_table.get(url)
                if not current or len(dim_keys & thumbnail.keys()) > len(dim_keys & current.keys()):
                    url_table[url] = thumbnail
            info['thumbnails'] = list(url_table.values()) or None
        def extract_video_object(e):
-            author = e.get('author')
+            info.update(traverse_obj(e, {
-            info.update({
+                'ext': ('encodingFormat', {mimetype2ext}),
-                'url': url_or_none(e.get('contentUrl')),
+                'title': ('name', {clean_html}, filter),
-                'ext': mimetype2ext(e.get('encodingFormat')),
+                'age_limit': ('isFamilyFriendly', {str}, {lambda x: 18 if x.lower() in ('false', '0') else None}),
-                'title': unescapeHTML(e.get('name')),
+                'artists': (('byArtist', 'name'), {clean_html}, filter, all, {orderedSet}, filter),
-                'description': unescapeHTML(e.get('description')),
+                'description': ('description', {clean_html}, filter),
-                'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
+                'duration': ('duration', {parse_duration}),
-                    'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
+                'filesize': ('contentSize', {float_or_none}, {int_or_none}),
-                })),
+                'genres': ('genre', {clean_html}, filter, all, {orderedSet}, filter),
-                'duration': parse_duration(e.get('duration')),
+                'height': ('height', {int_or_none}),
-                'timestamp': unified_timestamp(e.get('uploadDate')),
+                'is_live': ('publication', 'isLiveBroadcast', {bool}),
-                # author can be an instance of 'Organization' or 'Person' types.
+                'release_timestamp': ('datePublished', {parse_iso8601}),
-                # both types can have 'name' property(inherited from 'Thing' type). [1]
+                'tags': ('keywords', (None, ...), {clean_html},
-                # however some websites are using 'Text' type instead.
+                         {lambda x: x.split(',')}, ..., {str.strip}, filter, all, {orderedSet}, filter),
-                # 1. https://schema.org/VideoObject
+                'tbr': ('bitrate', {int_or_none}),
-                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+                'timestamp': ('uploadDate', {parse_iso8601}),
-                'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
+                'uploader': ('author', (None, 'name'), {clean_html}, filter, any),
-                'filesize': int_or_none(float_or_none(e.get('contentSize'))),
+                'url': ('contentUrl', {self._proto_relative_url}, {url_or_none}),
-                'tbr': int_or_none(e.get('bitrate')),
+                'view_count': ('interactionCount', {int_or_none}),
-                'width': int_or_none(e.get('width')),
+                'width': ('width', {int_or_none}),
-                'height': int_or_none(e.get('height')),
+            }))
                'view_count': int_or_none(e.get('interactionCount')),
                'tags': try_call(lambda: e.get('keywords').split(',')),
            })
            if is_type(e, 'AudioObject'):
                info.update({
                    'abr': traverse_obj(e, ('bitrate', {int_or_none})),
                    'vcodec': 'none',
                    'abr': int_or_none(e.get('bitrate')),
                })
            extract_interaction_statistic(e)
            extract_chapter_information(e)
            extract_thumbnail_information(e)
        def traverse_json_ld(json_ld, at_top_level=True):
            for e in variadic(json_ld):
@ -1717,40 +1756,70 @@ class InfoExtractor:
                    continue
                if expected_type is not None and not is_type(e, expected_type):
                    continue
-                rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
+                rating = traverse_obj(e, ('aggregateRating', 'ratingValue', {float_or_none}))
                if rating is not None:
                    info['average_rating'] = rating
                if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
-                    episode_name = unescapeHTML(e.get('name'))
+                    info.update(traverse_obj(e, {
-                    info.update({
+                        'id': ('identifier', {str_or_none}),
-                        'episode': episode_name,
+                        'ext': ('encodingFormat', {mimetype2ext}),
-                        'episode_number': int_or_none(e.get('episodeNumber')),
+                        'title': (('title', 'name'), {clean_html}, filter, any),
-                        'description': unescapeHTML(e.get('description')),
+                        'creators': ('productionCompany', {clean_html}, filter, all, {orderedSet}, filter),
-                    })
+                        'description': ('description', {clean_html}, filter),
-                    if not info.get('title') and episode_name:
+                        'duration': ((('duration', {parse_duration}), ('timeRequired', {int_or_none})), any),
-                        info['title'] = episode_name
+                        'episode': ('name', {clean_html}, filter),
                        'episode_number': ('episodeNumber', {int_or_none}),
                        'genres': ('genre', ..., {clean_html}, filter, all, {orderedSet}, filter),
                        'release_timestamp': ('datePublished', {parse_iso8601}),
                    }))
                    extract_thumbnail_information(e)
                    part_of_season = e.get('partOfSeason')
                    if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
-                        info.update({
+                        info.update(traverse_obj(e, {
-                            'season': unescapeHTML(part_of_season.get('name')),
+                            'season': ('name', {clean_html}, filter),
-                            'season_number': int_or_none(part_of_season.get('seasonNumber')),
+                            'season_number': ('seasonNumber', {int_or_none}),
-                        })
+                        }))
                    part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
                    if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
-                        info['series'] = unescapeHTML(part_of_series.get('name'))
+                        info['series'] = traverse_obj(part_of_series, ('name', {clean_html}, filter))
                elif is_type(e, 'Movie'):
-                    info.update({
+                    info.update(traverse_obj(e, {
-                        'title': unescapeHTML(e.get('name')),
+                        'title': ('name', {clean_html}, filter),
-                        'description': unescapeHTML(e.get('description')),
+                        'age_limit': ('contentRating', {parse_age_limit}),
-                        'duration': parse_duration(e.get('duration')),
+                        'cast': ('actor', ..., 'name', {clean_html}, filter, all, {orderedSet}, filter),
-                        'timestamp': unified_timestamp(e.get('dateCreated')),
+                        'creators': ('director', (None, ((None, ...), 'name')), {clean_html}, filter, all, {orderedSet}, filter),
-                    })
+                        'description': ('description', {clean_html}, filter),
                        'duration': ('duration', {parse_duration}),
                        'genres': ('genre', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
                        'release_timestamp': ('datePublished', {parse_iso8601}),
                    }))
                    extract_thumbnail_information(e)
                    if date := traverse_obj(e, ('dateCreated', {str_or_none})):
                        if re.fullmatch(r'\d{4}', date):
                            info['release_year'] = int_or_none(date)
                        elif re.fullmatch(r'\d{4}-\d{2}-\d{2}', date):
                            info['upload_date'] = unified_strdate(date)
                        else:
                            info['timestamp'] = parse_iso8601(date)
                elif is_type(e, 'Article', 'NewsArticle'):
-                    info.update({
+                    info.update(traverse_obj(e, {
-                        'timestamp': parse_iso8601(e.get('datePublished')),
+                        'title': ('headline', {clean_html}, filter),
-                        'title': unescapeHTML(e.get('headline')),
+                        'alt_title': ('alternativeHeadline', {clean_html}, filter),
-                        'description': unescapeHTML(e.get('articleBody') or e.get('description')),
+                        'categories': ('articleSection', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
-                    })
+                        'comment_count': ('commentCount', {int_or_none}),
                        'creators': ('author', (None, ...), 'name', {clean_html}, filter, all, {orderedSet}, filter),
                        'description': (('description', 'articleBody'), {clean_html}, filter, any),
                        'duration': ('timeRequired', {int_or_none}),
                        'genres': ('genre', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
                        'location': ('contentLocation', 'name', {clean_html}, filter),
                        'modified_timestamp': ('dateModified', {parse_iso8601}),
                        'release_timestamp': ('datePublished', {parse_iso8601}),
                        'tags': ('keywords', (None, ...), {clean_html},
                                 {lambda x: x.split(',')}, ..., {str.strip}, filter, all, {orderedSet}, filter),
                        'timestamp': ('dateCreated', {parse_iso8601}),
                        'uploader': ('publisher', 'name', {clean_html}, filter),
                    }))
                    extract_thumbnail_information(e)
                    if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
                        extract_video_object(e['video'][0])
                    elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):