Merge 117300e1bd into 4429fd0450

2 days ago · 774f00daf4
parent 4429fd0450 117300e1bd
commit 774f00daf4
2 changed files with 576 additions and 226 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -132,130 +132,169 @@ class TestInfoExtractor(unittest.TestCase):
        self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)

    def test_search_json_ld_realworld(self):
-        _TESTS = [
+        _TESTS = [(
            # https://github.com/ytdl-org/youtube-dl/issues/23306
-            (
-                r'''<script type="application/ld+json">
-{
-"@context": "http://schema.org/",
-"@type": "VideoObject",
-"name": "1 On 1 With Kleio",
-"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
-"duration": "PT0H12M23S",
-"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
-"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
-"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
-"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
-"width": "1920",
-"height": "1080",
-"encodingFormat": "mp4",
-"bitrate": "6617kbps",
-"isFamilyFriendly": "False",
-"description": "Kleio Valentien",
-"uploadDate": "2015-12-05T21:24:35+01:00",
-"interactionStatistic": {
-"@type": "InteractionCounter",
-"interactionType": { "@type": "http://schema.org/WatchAction" },
-"userInteractionCount": 1120958
-}, "aggregateRating": {
-"@type": "AggregateRating",
-"ratingValue": "88",
-"ratingCount": "630",
-"bestRating": "100",
-"worstRating": "0"
-}, "actor": [{
-"@type": "Person",
-"name": "Kleio Valentien",
-"url": "https://www.eporner.com/pornstar/kleio-valentien/"
-}]}
-                </script>''',
-                {
-                    'title': '1 On 1 With Kleio',
-                    'description': 'Kleio Valentien',
-                    'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
-                    'timestamp': 1449347075,
-                    'duration': 743.0,
-                    'view_count': 1120958,
-                    'width': 1920,
-                    'height': 1080,
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "http://schema.org/",
+                "@type": "VideoObject",
+                "name": "1 On 1 With Kleio",
+                "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
+                "duration": "PT0H12M23S",
+                "thumbnailUrl": [
+                    "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
+                    "https://imggen.eporner.com/780814/1920/1080/9.jpg"
+                ],
+                "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
+                "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
+                "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
+                "width": "1920",
+                "height": "1080",
+                "encodingFormat": "mp4",
+                "bitrate": "6617kbps",
+                "isFamilyFriendly": "False",
+                "description": "Kleio Valentien",
+                "uploadDate": "2015-12-05T21:24:35+01:00",
+                "interactionStatistic": {
+                    "@type": "InteractionCounter",
+                    "interactionType": {
+                        "@type": "http://schema.org/WatchAction"
+                    },
+                    "userInteractionCount": 1120958
                },
-                {},
-            ),
-            (
-                r'''<script type="application/ld+json">
-      {
-      "@context": "https://schema.org",
-      "@graph": [
-      {
-      "@type": "NewsArticle",
-      "mainEntityOfPage": {
-      "@type": "WebPage",
-      "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn"
-      },
-      "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
-      "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν",
-      "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.",
-      "image": {
-      "@type": "ImageObject",
-      "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg",
-      "width": 1100,
-      "height": 756            },
-      "datePublished": "2021-11-10T08:50:00+03:00",
-      "dateModified": "2021-11-10T08:52:53+03:00",
-      "author": {
-      "@type": "Person",
-      "@id": "https://www.ant1news.gr/",
-      "name": "Ant1news",
-      "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
-      "url": "https://www.ant1news.gr/"
-      },
-      "publisher": {
-      "@type": "Organization",
-      "@id": "https://www.ant1news.gr#publisher",
-      "name": "Ant1news",
-      "url": "https://www.ant1news.gr",
-      "logo": {
-      "@type": "ImageObject",
-      "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png",
-      "width": 400,
-      "height": 400                },
-      "sameAs": [
-      "https://www.facebook.com/Ant1news.gr",
-      "https://twitter.com/antennanews",
-      "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw",
-      "https://www.instagram.com/ant1news/"
-      ]
-      },
-
-      "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news",
-
-
-      "articleSection": "Κοινωνία"
-      }
-      ]
-      }
-                </script>''',
-                {
-                    'timestamp': 1636523400,
-                    'title': 'md5:91fe569e952e4d146485740ae927662b',
+                "aggregateRating": {
+                    "@type": "AggregateRating",
+                    "ratingValue": "88",
+                    "ratingCount": "630",
+                    "bestRating": "100",
+                    "worstRating": "0"
                },
-                {'expected_type': 'NewsArticle'},
-            ),
-            (
-                r'''<script type="application/ld+json">
-                {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
+                "actor": [{
+                    "@type": "Person",
+                    "name": "Kleio Valentien",
+                    "url": "https://www.eporner.com/pornstar/kleio-valentien/"
+                }]
+            }
+            </script>
+            ''', {
+                'ext': 'mp4',
+                'title': '1 On 1 With Kleio',
+                'age_limit': 18,
+                'artists': ['1 On 1 With Kleio'],
+                'average_rating': 88,
+                'description': 'Kleio Valentien',
+                'duration': 743,
+                'height': 1080,
+                'thumbnails': [
+                    {'url': 'https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg'},
+                    {'url': 'https://imggen.eporner.com/780814/1920/1080/9.jpg'},
+                ],
+                'timestamp': 1449347075,
+                'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+                'view_count': 1120958,
+                'width': 1920,
+            }, {},
+        ), (
+            # https://github.com/yt-dlp/yt-dlp/pull/1983
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@graph": [{
+                    "@type": "NewsArticle",
+                    "name": "Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia",
+                    "headline": "Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia",
+                    "description": "“Arif”, an environmental journalist investigating the shark fishing trade in Surabaya, Indonesia, speaks to industry insiders to understand the business — from the port where fishermen sell a wide variety of sharks, to a drying facility where sharks are processed before exporters pick them up. Shark fishing is l",
+                    "about": [
+                        "leopard sharks",
+                        "Rewilding Sharks",
+                        "sustainability",
+                        "animals"
+                    ],
+                    "image": {
+                        "@type": "ImageObject",
+                        "url": "https://dam.mediacorp.sg/image/upload/s--0VYzW7We--/c_fill,g_auto,h_338,w_600/f_auto,q_auto/v1/mediacorp/cna/image/2025/03/17/1742148440-image.jpg?itok=rav-cQ_p",
+                        "width": "100",
+                        "height": "100"
+                    },
+                    "datePublished": "2025-03-17T01:58:00+08:00",
+                    "dateModified": "2025-04-08T12:07:12+08:00",
+                    "author": {
+                        "@type": "Person",
+                        "@id": "https://www.channelnewsasia.com/",
+                        "name": "CNA",
+                        "url": "https://www.channelnewsasia.com/"
+                    },
+                    "publisher": {
+                        "@type": "Organization",
+                        "@id": "https://www.channelnewsasia.com/",
+                        "name": "CNA",
+                        "url": "https://www.channelnewsasia.com/",
+                        "logo": {
+                            "@type": "ImageObject",
+                            "url": "https://www.channelnewsasia.com/sites/default/themes/mc_cna_theme/images/logo.svg",
+                            "width": "100",
+                            "height": "100"
+                        }
+                    },
+                    "mainEntityOfPage": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256"
+                }, {
+                    "@type": "VideoObject",
+                    "thumbnailUrl": "https://cf-images.ap-southeast-1.prod.boltdns.net/v1/static/6057984932001/b49a7cc0-bbd3-4634-8049-756f0bf3d0c3/3e2f7ea5-0290-4760-889f-b084117a46e8/1280x720/match/image.jpg",
+                    "uploadDate": "2025-04-08T12:07:12+08:00",
+                    "description": "“Arif”, an environmental journalist investigating the shark fishing trade in Surabaya, Indonesia, speaks to industry insiders to understand the business — from the port where fishermen sell a wide variety of sharks, to a drying facility where sharks are processed before exporters pick them up. Shark fishing is legal in Indonesia, and sharks bring in good money. Some species are highly sought after. For example, leopard sharks are prized for their special skin. A fisherman revealed that he gets requests for up to 600kg of leopard shark in a month, worth about 282 million rupiah (US$17,000).",
+                    "name": "Inside the lucrative trade of shark fishing in Indonesia",
+                    "@id": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256",
+                    "duration": "PT472S",
+                    "embedUrl": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256?view=embed",
+                    "contentUrl": "https://www.channelnewsasia.com/watch/rewilding-sharks/inside-lucrative-trade-shark-fishing-indonesia-5004256"
+                }]
+            }
+            </script>
+            ''', {
+                'title': 'Rewilding Sharks - Inside the lucrative trade of shark fishing in Indonesia',
+                'creators': ['CNA'],
+                'description': 'md5:4ce967a72d546b32935cb98c8722346b',
+                'modified_timestamp': 1744085232,
+                'release_timestamp': 1742147880,
+                'thumbnails': [{
+                    'height': 100,
+                    'url': 'https://dam.mediacorp.sg/image/upload/s--0VYzW7We--/c_fill,g_auto,h_338,w_600/f_auto,q_auto/v1/mediacorp/cna/image/2025/03/17/1742148440-image.jpg?itok=rav-cQ_p',
+                    'width': 100,
+                }],
+            }, {'expected_type': 'NewsArticle'},
+        ), (
+            # https://github.com/yt-dlp/yt-dlp/pull/2031
+            r'''
+            <script type="application/ld+json">
+            {
+                "url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
                "name":"Het journaal 19u",
                "description":"Het journaal 19u van vrijdag 31 december 2021.",
-                "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"},
-                "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"},
+                "potentialAction":{
+                    "url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8",
+                    "@type":"ShareAction"
+                },
+                "mainEntityOfPage":{
+                    "@id":"1640092242445",
+                    "@type":"WebPage"
+                },
                "publication":[{
                    "startDate":"2021-12-31T19:00:00.000+01:00",
                    "endDate":"2022-01-30T23:55:00.000+01:00",
-                    "publishedBy":{"name":"een","@type":"Organization"},
-                    "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"},
+                    "publishedBy":{
+                        "name":"een",
+                        "@type":"Organization"
+                    },
+                    "publishedOn":{
+                        "url":"https://www.vrt.be/vrtnu/",
+                        "name":"VRT NU",
+                        "@type":"BroadcastService"
+                    },
                    "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8",
                    "@type":"BroadcastEvent"
-                    }],
+                }],
                "video":{
                    "name":"Het journaal - Aflevering 365 (Seizoen 2021)",
                    "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.",
@ -272,7 +311,7 @@ class TestInfoExtractor(unittest.TestCase):
                        {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"},
                        {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"},
                        {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"}
-                        ],
+                    ],
                    "duration":"PT34M39.23S",
                    "uploadDate":"2021-12-31T19:00:00.000+01:00",
                    "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5",
@ -280,67 +319,309 @@ class TestInfoExtractor(unittest.TestCase):
                },
                "genre":["Nieuws en actua"],
                "episodeNumber":365,
-                "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"},
-                "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"},
-                "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script>
-                ''',
-                {
-                    'chapters': [
-                        {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440},
-                        {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179},
-                        {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263},
-                        {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367},
-                        {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383},
-                        {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484},
-                        {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575},
-                        {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728},
-                        {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873},
-                        {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23},
-                    ],
-                    'title': 'Het journaal - Aflevering 365 (Seizoen 2021)',
-                }, {},
-            ),
-            (
-                # test multiple thumbnails in a list
-                r'''
-<script type="application/ld+json">
-{"@context":"https://schema.org",
-"@type":"VideoObject",
-"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
-</script>''',
-                {
-                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                "partOfSeries":{
+                    "name":"Het journaal",
+                    "@id":"222831405527",
+                    "@type":"TVSeries"
                },
-                {},
-            ),
-            (
-                # test single thumbnail
-                r'''
-<script type="application/ld+json">
-{"@context":"https://schema.org",
-"@type":"VideoObject",
-"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
-</script>''',
-                {
-                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                "partOfSeason":{
+                    "name":"Seizoen 2021",
+                    "@id":"961809365527",
+                    "@type":"TVSeason"
                },
-                {},
-            ),
-            (
-                # test thumbnail_url key without URL scheme
-                r'''
-<script type="application/ld+json">
-{
-"@context": "https://schema.org",
-"@type": "VideoObject",
-"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
-}</script>''',
-                {
-                    'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
-                },
-                {},
-            ),
-        ]
+                "@context":"https://schema.org",
+                "@id":"961685295527",
+                "@type":"TVEpisode"
+            }
+            </script>
+            ''', {
+                'title': 'Het journaal - Aflevering 365 (Seizoen 2021)',
+                'artists': ['Het journaal - Aflevering 365 (Seizoen 2021)'],
+                'chapters': [
+                    {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440},
+                    {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179},
+                    {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263},
+                    {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367},
+                    {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383},
+                    {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484},
+                    {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575},
+                    {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728},
+                    {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873},
+                    {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23},
+                ],
+                'description': 'Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.',
+                'duration': 2079.23,
+                'episode': 'Het journaal 19u',
+                'episode_number': 365,
+                'genres': ['Nieuws en actua'],
+                'season': 'Het journaal 19u',
+                'series': 'Het journaal',
+                'thumbnails': [{'url': 'https://images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg'}],
+                'timestamp': 1640973600,
+            }, {},
+        ), (
+            # thumbnailUrl, {str}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context":"https://schema.org",
+                "@type":"VideoObject",
+                "thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"
+            }
+            </script>
+            ''', {
+                'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+            }, {},
+        ), (
+            # no scheme URL
+            # thumbnail_url, {str}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@type": "VideoObject",
+                "thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
+            }
+            </script>
+            ''', {
+                'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
+            }, {},
+        ), (
+            # no scheme URL
+            # thumbnailURL, {str}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org/",
+                "@type": "VideoObject",
+                "thumbnailURL": "//images.ctfassets.net/o78em1y1w4i4/2XrBpSdjPK1OJXAAFYU8iw/76ffd0f25465502c9a704dcfc2aa6c64/Teaser-Elsevier-careers-video-thumbnail.jpg"
+            }
+            </script>
+            ''', {
+                'thumbnails': [{'url': 'https://images.ctfassets.net/o78em1y1w4i4/2XrBpSdjPK1OJXAAFYU8iw/76ffd0f25465502c9a704dcfc2aa6c64/Teaser-Elsevier-careers-video-thumbnail.jpg'}],
+            }, {},
+        ), (
+            # escaped URL
+            # image, {"url": {str}}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https:\/\/schema.org",
+                "@type": "NewsArticle",
+                "image": {
+                    "@context": "https:\/\/schema.org",
+                    "@type": "ImageObject",
+                    "height": 630,
+                    "width": 1200,
+                    "url": "https:\/\/assets1.cbsnewsstatic.com\/hub\/i\/r\/2025\/08\/29\/597b721d-9c95-424d-8720-05df6b8a4a4e\/thumbnail\/1200x630\/45e846ad5f209972ab225651b40d0b4d\/cbsn-fusion-al-gore-on-20-years-since-hurricane-katrina-thumbnail.jpg"
+                }
+            }
+            </script>
+            ''', {
+                'thumbnails': [{
+                    'height': 630,
+                    'url': 'https://assets1.cbsnewsstatic.com/hub/i/r/2025/08/29/597b721d-9c95-424d-8720-05df6b8a4a4e/thumbnail/1200x630/45e846ad5f209972ab225651b40d0b4d/cbsn-fusion-al-gore-on-20-years-since-hurricane-katrina-thumbnail.jpg',
+                    'width': 1200,
+                }],
+            }, {},
+        ), (
+            # nested width/height
+            # image, {"url": {str}}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "http://schema.org",
+                "@type": "NewsArticle",
+                "image": {
+                    "@type": "ImageObject",
+                    "width": {
+                        "@type": "QuantitativeValue",
+                        "unitText": "px",
+                        "value": 1024
+                    },
+                    "height": {
+                        "@type": "QuantitativeValue",
+                        "unitText": "px",
+                        "value": 576
+                    },
+                    "url": "https://ichef.bbci.co.uk/ace/standard/1024/cpsprodpb/398e/live/1cc4dab0-8689-11f0-9cf6-cbf3e73ce2b9.jpg"
+                }
+            }
+            </script>
+            ''', {
+                'thumbnails': [{
+                    'height': 576,
+                    'url': 'https://ichef.bbci.co.uk/ace/standard/1024/cpsprodpb/398e/live/1cc4dab0-8689-11f0-9cf6-cbf3e73ce2b9.jpg',
+                    'width': 1024,
+                }],
+            }, {},
+        ), (
+            # image, {"url": [{str}]}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org/",
+                "@type": [
+                    "NewsArticle",
+                    "Article"
+                ],
+                "image": {
+                    "@type": "ImageObject",
+                    "url": [
+                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1103&smart=true",
+                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1470&smart=true",
+                        "https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=980&height=980&smart=true"
+                    ]
+                }
+            }
+            </script>
+            ''', {
+                'thumbnails': [{
+                    'height': 1103,
+                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1103&smart=true',
+                    'width': 1960,
+                }, {
+                    'height': 1470,
+                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=1960&height=1470&smart=true',
+                    'width': 1960,
+                }, {
+                    'height': 980,
+                    'url': 'https://imagenes.elpais.com/resizer/v2/JUFK7HEHNNAIVJXC3WVUXNZT7M.jpg?auth=350ef4714331cf2e29299b840f86da4eb8b0ddde3a3bdb3f4302f0c90a9ae2d6&width=980&height=980&smart=true',
+                    'width': 980,
+                }],
+            }, {},
+        ), (
+            # image, [{"contentUrl": {str}}]
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org/",
+                "@type": "VideoObject",
+                "image": [
+                    {
+                        "@type": "ImageObject",
+                        "contentUrl": "https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-gseagaleriexl.jpg",
+                        "height": 900,
+                        "width": 1600
+                    },
+                    {
+                        "@type": "ImageObject",
+                        "contentUrl": "https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-HintergrundL.jpg",
+                        "height": 1152,
+                        "width": 1536
+                    }
+                ]
+            }
+            </script>
+            ''', {
+                'thumbnails': [{
+                    'height': 900,
+                    'url': 'https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-gseagaleriexl.jpg',
+                    'width': 1600,
+                }, {
+                    'height': 1152,
+                    'url': 'https://www.planet-wissen.de/sendungen/lebensglueck-frau-sprung-strand-100~_v-HintergrundL.jpg',
+                    'width': 1536,
+                }],
+            }, {},
+        ), (
+            # duplicate thumbnails
+            # image, [{"url": {str}}],
+            # thumbnail, [{"url": {str}}]
+            # thumbnailUrl, [{str}]
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@type": "VideoObject",
+                "image": [{
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
+                    "width": 1920,
+                    "height": 1080
+                }, {
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
+                    "width": 1400,
+                    "height": 1400
+                }, {
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280",
+                    "width": 1280,
+                    "height": 960
+                }],
+                "thumbnail": [{
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
+                    "width": 1920,
+                    "height": 1080
+                }, {
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
+                    "width": 1400,
+                    "height": 1400
+                }, {
+                    "@type": "ImageObject",
+                    "url": "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280",
+                    "width": 1280,
+                    "height": 960
+                }],
+                "thumbnailUrl": [
+                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg",
+                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg",
+                    "https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280"
+                ]
+            }
+            </script>
+            ''', {
+                'thumbnails': [{
+                    'height': 1080,
+                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiLwU/16x9-1920/leverkusen-322.jpg',
+                    'width': 1920,
+                }, {
+                    'height': 1400,
+                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJibh0/1x1-1400/leverkusen-322.jpg',
+                    'width': 1400,
+                }, {
+                    'height': 960,
+                    'url': 'https://images.sportschau.de/image/031d1445-9620-4b24-b1f5-9cbbff422f21/AAABmQQq_vA/AAABmKJiwCE/4x3/leverkusen-322.jpg?width=1280',
+                    'width': 1280,
+                }],
+            }, {},
+        ), (
+            # dateCreated, \d{4}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@type": "Movie",
+                "dateCreated": "2025"
+            }
+            </script>
+            ''', {'release_year': 2025}, {},
+        ), (
+            # dateCreated, \d{4}-\d{2}-\d{2}
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@type": "Movie",
+                "dateCreated": "2025-09-01"
+            }
+            </script>
+            ''', {'upload_date': '20250901'}, {},
+        ), (
+            # dateCreated, ISO 8601
+            r'''
+            <script type="application/ld+json">
+            {
+                "@context": "https://schema.org",
+                "@type": "Movie",
+                "dateCreated": "2025-09-01T00:00:00Z"
+            }
+            </script>
+            ''', {'timestamp': 1756684800}, {},
+        )]
        for html, expected_dict, search_json_ld_kwargs in _TESTS:
            expect_dict(
                self,
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -72,11 +72,13 @@ from ..utils import (
    mimetype2ext,
    netrc_from_content,
    orderedSet,
+    parse_age_limit,
    parse_bitrate,
    parse_codecs,
    parse_duration,
    parse_iso8601,
    parse_m3u8_attributes,
+    parse_qs,
    parse_resolution,
    qualities,
    sanitize_url,
@ -84,13 +86,11 @@ from ..utils import (
    str_or_none,
    str_to_int,
    strip_or_none,
-    traverse_obj,
    truncate_string,
    try_call,
    try_get,
    unescapeHTML,
    unified_strdate,
-    unified_timestamp,
    url_basename,
    url_or_none,
    urlhandle_detect_ext,
@ -102,6 +102,7 @@ from ..utils import (
 )
 from ..utils._utils import _request_dump_filename
 from ..utils.jslib import devalue
+from ..utils.traversal import traverse_obj


 class InfoExtractor:
@ -1673,38 +1674,76 @@ class InfoExtractor:
                chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
                info['chapters'] = chapters

+        def extract_thumbnail_information(e):
+            thumbnails = traverse_obj(e, ((
+                'image', 'thumbnail', 'thumbnailUrl', 'thumbnailURL', 'thumbnail_url',
+            ), (
+                ({str}, {url_or_none}, {'url': None}, filter),
+                ({dict}, 'url', {list}, ..., {'url': None}, filter),
+                (({list}, ({dict}, all)), lambda _, v:
+                    url_or_none(v.get('url')) or url_or_none(v.get('contentUrl'))),
+                ({list}, ..., {str}, {url_or_none}, {'url': None}, filter),
+            ), {
+                'height': ('height', (None, 'value'), {int_or_none}, any),
+                'url': (('contentUrl', 'url'), {str}, {unescapeHTML}, {self._proto_relative_url}, any),
+                'width': ('width', (None, 'value'), {int_or_none}, any),
+            }, all, {orderedSet}, lambda _, v: url_or_none(v['url'])))
+
+            dim_keys, url_table = {'height', 'width'}, {}
+            for thumbnail in thumbnails:
+                url = thumbnail['url']
+
+                query = parse_qs(thumbnail['url'])
+                for key, alt_keys in (
+                    ('height', ('height', 'h')),
+                    ('width', ('width', 'w')),
+                ):
+                    val = traverse_obj(query, (alt_keys, -1, {int_or_none}, any))
+                    if val is not None:
+                        thumbnail.setdefault(key, val)
+
+                res = parse_resolution(url_basename(url))
+                for key in dim_keys:
+                    val = res.get(key)
+                    if val is not None:
+                        thumbnail.setdefault(key, val)
+
+                current = url_table.get(url)
+                if not current or len(dim_keys & thumbnail.keys()) > len(dim_keys & current.keys()):
+                    url_table[url] = thumbnail
+
+            info['thumbnails'] = list(url_table.values()) or None
+
        def extract_video_object(e):
-            author = e.get('author')
-            info.update({
-                'url': url_or_none(e.get('contentUrl')),
-                'ext': mimetype2ext(e.get('encodingFormat')),
-                'title': unescapeHTML(e.get('name')),
-                'description': unescapeHTML(e.get('description')),
-                'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
-                    'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
-                })),
-                'duration': parse_duration(e.get('duration')),
-                'timestamp': unified_timestamp(e.get('uploadDate')),
-                # author can be an instance of 'Organization' or 'Person' types.
-                # both types can have 'name' property(inherited from 'Thing' type). [1]
-                # however some websites are using 'Text' type instead.
-                # 1. https://schema.org/VideoObject
-                'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
-                'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
-                'filesize': int_or_none(float_or_none(e.get('contentSize'))),
-                'tbr': int_or_none(e.get('bitrate')),
-                'width': int_or_none(e.get('width')),
-                'height': int_or_none(e.get('height')),
-                'view_count': int_or_none(e.get('interactionCount')),
-                'tags': try_call(lambda: e.get('keywords').split(',')),
-            })
+            info.update(traverse_obj(e, {
+                'ext': ('encodingFormat', {mimetype2ext}),
+                'title': ('name', {clean_html}, filter),
+                'age_limit': ('isFamilyFriendly', {str}, {lambda x: 18 if x.lower() in ('false', '0') else None}),
+                'artists': (('byArtist', 'name'), {clean_html}, filter, all, {orderedSet}, filter),
+                'description': ('description', {clean_html}, filter),
+                'duration': ('duration', {parse_duration}),
+                'filesize': ('contentSize', {float_or_none}, {int_or_none}),
+                'genres': ('genre', {clean_html}, filter, all, {orderedSet}, filter),
+                'height': ('height', {int_or_none}),
+                'is_live': ('publication', 'isLiveBroadcast', {bool}),
+                'release_timestamp': ('datePublished', {parse_iso8601}),
+                'tags': ('keywords', (None, ...), {clean_html},
+                         {lambda x: x.split(',')}, ..., {str.strip}, filter, all, {orderedSet}, filter),
+                'tbr': ('bitrate', {int_or_none}),
+                'timestamp': ('uploadDate', {parse_iso8601}),
+                'uploader': ('author', (None, 'name'), {clean_html}, filter, any),
+                'url': ('contentUrl', {self._proto_relative_url}, {url_or_none}),
+                'view_count': ('interactionCount', {int_or_none}),
+                'width': ('width', {int_or_none}),
+            }))
            if is_type(e, 'AudioObject'):
                info.update({
+                    'abr': traverse_obj(e, ('bitrate', {int_or_none})),
                    'vcodec': 'none',
-                    'abr': int_or_none(e.get('bitrate')),
                })
            extract_interaction_statistic(e)
            extract_chapter_information(e)
+            extract_thumbnail_information(e)

        def traverse_json_ld(json_ld, at_top_level=True):
            for e in variadic(json_ld):
@ -1717,40 +1756,70 @@ class InfoExtractor:
                    continue
                if expected_type is not None and not is_type(e, expected_type):
                    continue
-                rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
+                rating = traverse_obj(e, ('aggregateRating', 'ratingValue', {float_or_none}))
                if rating is not None:
                    info['average_rating'] = rating
                if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
-                    episode_name = unescapeHTML(e.get('name'))
-                    info.update({
-                        'episode': episode_name,
-                        'episode_number': int_or_none(e.get('episodeNumber')),
-                        'description': unescapeHTML(e.get('description')),
-                    })
-                    if not info.get('title') and episode_name:
-                        info['title'] = episode_name
+                    info.update(traverse_obj(e, {
+                        'id': ('identifier', {str_or_none}),
+                        'ext': ('encodingFormat', {mimetype2ext}),
+                        'title': (('title', 'name'), {clean_html}, filter, any),
+                        'creators': ('productionCompany', {clean_html}, filter, all, {orderedSet}, filter),
+                        'description': ('description', {clean_html}, filter),
+                        'duration': ((('duration', {parse_duration}), ('timeRequired', {int_or_none})), any),
+                        'episode': ('name', {clean_html}, filter),
+                        'episode_number': ('episodeNumber', {int_or_none}),
+                        'genres': ('genre', ..., {clean_html}, filter, all, {orderedSet}, filter),
+                        'release_timestamp': ('datePublished', {parse_iso8601}),
+                    }))
+                    extract_thumbnail_information(e)
                    part_of_season = e.get('partOfSeason')
                    if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
-                        info.update({
-                            'season': unescapeHTML(part_of_season.get('name')),
-                            'season_number': int_or_none(part_of_season.get('seasonNumber')),
-                        })
+                        info.update(traverse_obj(e, {
+                            'season': ('name', {clean_html}, filter),
+                            'season_number': ('seasonNumber', {int_or_none}),
+                        }))
                    part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
                    if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
-                        info['series'] = unescapeHTML(part_of_series.get('name'))
+                        info['series'] = traverse_obj(part_of_series, ('name', {clean_html}, filter))
                elif is_type(e, 'Movie'):
-                    info.update({
-                        'title': unescapeHTML(e.get('name')),
-                        'description': unescapeHTML(e.get('description')),
-                        'duration': parse_duration(e.get('duration')),
-                        'timestamp': unified_timestamp(e.get('dateCreated')),
-                    })
+                    info.update(traverse_obj(e, {
+                        'title': ('name', {clean_html}, filter),
+                        'age_limit': ('contentRating', {parse_age_limit}),
+                        'cast': ('actor', ..., 'name', {clean_html}, filter, all, {orderedSet}, filter),
+                        'creators': ('director', (None, ((None, ...), 'name')), {clean_html}, filter, all, {orderedSet}, filter),
+                        'description': ('description', {clean_html}, filter),
+                        'duration': ('duration', {parse_duration}),
+                        'genres': ('genre', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
+                        'release_timestamp': ('datePublished', {parse_iso8601}),
+                    }))
+                    extract_thumbnail_information(e)
+                    if date := traverse_obj(e, ('dateCreated', {str_or_none})):
+                        if re.fullmatch(r'\d{4}', date):
+                            info['release_year'] = int_or_none(date)
+                        elif re.fullmatch(r'\d{4}-\d{2}-\d{2}', date):
+                            info['upload_date'] = unified_strdate(date)
+                        else:
+                            info['timestamp'] = parse_iso8601(date)
                elif is_type(e, 'Article', 'NewsArticle'):
-                    info.update({
-                        'timestamp': parse_iso8601(e.get('datePublished')),
-                        'title': unescapeHTML(e.get('headline')),
-                        'description': unescapeHTML(e.get('articleBody') or e.get('description')),
-                    })
+                    info.update(traverse_obj(e, {
+                        'title': ('headline', {clean_html}, filter),
+                        'alt_title': ('alternativeHeadline', {clean_html}, filter),
+                        'categories': ('articleSection', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
+                        'comment_count': ('commentCount', {int_or_none}),
+                        'creators': ('author', (None, ...), 'name', {clean_html}, filter, all, {orderedSet}, filter),
+                        'description': (('description', 'articleBody'), {clean_html}, filter, any),
+                        'duration': ('timeRequired', {int_or_none}),
+                        'genres': ('genre', (None, ...), {clean_html}, filter, all, {orderedSet}, filter),
+                        'location': ('contentLocation', 'name', {clean_html}, filter),
+                        'modified_timestamp': ('dateModified', {parse_iso8601}),
+                        'release_timestamp': ('datePublished', {parse_iso8601}),
+                        'tags': ('keywords', (None, ...), {clean_html},
+                                 {lambda x: x.split(',')}, ..., {str.strip}, filter, all, {orderedSet}, filter),
+                        'timestamp': ('dateCreated', {parse_iso8601}),
+                        'uploader': ('publisher', 'name', {clean_html}, filter),
+                    }))
+                    extract_thumbnail_information(e)
                    if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
                        extract_video_object(e['video'][0])
                    elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):