From 18fe696df9d60804a8f5cb8cd74f38111d6eb711 Mon Sep 17 00:00:00 2001
From: Gegham Zakaryan <zakaryan.2004@outlook.com>
Date: Thu, 28 Aug 2025 05:12:08 +0400
Subject: [PATCH] [ie/googledrive] Fix subtitles extraction (#14139)

Authored by: zakaryan2004
---
 yt_dlp/extractor/googledrive.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index dfba2d3ba1..0c84f0b241 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -12,6 +12,7 @@ from ..utils import (
     get_element_html_by_id,
     int_or_none,
     lowercase_escape,
+    parse_qs,
     try_get,
     update_url_query,
 )
@@ -111,14 +112,18 @@ class GoogleDriveIE(InfoExtractor):
                     self._caption_formats_ext.append(f.attrib['fmt_code'])
 
     def _get_captions_by_type(self, video_id, subtitles_id, caption_type,
-                              origin_lang_code=None):
+                              origin_lang_code=None, origin_lang_name=None):
         if not subtitles_id or not caption_type:
             return
         captions = {}
         for caption_entry in self._captions_xml.findall(
                 self._CAPTIONS_ENTRY_TAG[caption_type]):
             caption_lang_code = caption_entry.attrib.get('lang_code')
-            if not caption_lang_code:
+            caption_name = caption_entry.attrib.get('name') or origin_lang_name
+            if not caption_lang_code or not caption_name:
+                self.report_warning(f'Missing necessary caption metadata. '
+                                    f'Need lang_code and name attributes. '
+                                    f'Found: {caption_entry.attrib}')
                 continue
             caption_format_data = []
             for caption_format in self._caption_formats_ext:
@@ -129,7 +134,7 @@ class GoogleDriveIE(InfoExtractor):
                     'lang': (caption_lang_code if origin_lang_code is None
                              else origin_lang_code),
                     'type': 'track',
-                    'name': '',
+                    'name': caption_name,
                     'kind': '',
                 }
                 if origin_lang_code is not None:
@@ -155,14 +160,15 @@ class GoogleDriveIE(InfoExtractor):
         self._download_subtitles_xml(video_id, subtitles_id, hl)
         if not self._captions_xml:
             return
-        track = self._captions_xml.find('track')
+        track = next((t for t in self._captions_xml.findall('track') if t.attrib.get('cantran') == 'true'), None)
         if track is None:
             return
         origin_lang_code = track.attrib.get('lang_code')
-        if not origin_lang_code:
+        origin_lang_name = track.attrib.get('name')
+        if not origin_lang_code or not origin_lang_name:
             return
         return self._get_captions_by_type(
-            video_id, subtitles_id, 'automatic_captions', origin_lang_code)
+            video_id, subtitles_id, 'automatic_captions', origin_lang_code, origin_lang_name)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -268,10 +274,8 @@ class GoogleDriveIE(InfoExtractor):
         subtitles_id = None
         ttsurl = get_value('ttsurl')
         if ttsurl:
-            # the video Id for subtitles will be the last value in the ttsurl
-            # query string
-            subtitles_id = ttsurl.encode().decode(
-                'unicode_escape').split('=')[-1]
+            # the subtitles ID is the vid param of the ttsurl query
+            subtitles_id = parse_qs(ttsurl).get('vid', [None])[-1]
 
         self.cookiejar.clear(domain='.google.com', path='/', name='NID')