diff mbox series

[FFmpeg-devel,3/3] directshow: add ability to read closed caption raw byte pairs from VBI pin

Message ID CAL1QdWfsug+Uwwnu3ZxFUT+4G+bQuApCyGrkhLxYp33=UVqvfg@mail.gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] closed caption decoder: accept and decode a new codec type of 'raw 608 byte pairs'
Related show

Commit Message

Roger Pack April 28, 2020, 6:29 a.m. UTC
Here's the final piece (to actually use the new codec), where we allow
to read raw CC 608 byte pairs from
analog directshow TV capture devices ("read" in a loose sense of the
term, it requires an intermediate dshow filter to extract the CC raw
bytes from the VBI incoming stream, but anyway it works).

Many thanks to grabien for sponsoring this effort, and to the
maintainers of the closed caption decoder and
everyone else which made it possible.

Signed-off-by: rogerdpack <rogerpack2005@gmail.com>
---
 doc/indevs.texi             | 15 ++++++-
 libavdevice/dshow.c         | 89 ++++++++++++++++++++++++++-----------
 libavdevice/dshow_capture.h | 16 ++++---
 libavdevice/dshow_pin.c     | 12 +++--
 4 files changed, 95 insertions(+), 37 deletions(-)
diff mbox series

Patch

diff --git a/doc/indevs.texi b/doc/indevs.texi
index 6f5afaf344..21d35d118a 100644
--- a/doc/indevs.texi
+++ b/doc/indevs.texi
@@ -452,8 +452,12 @@  The input name should be in the format:
 @var{TYPE}=@var{NAME}[:@var{TYPE}=@var{NAME}]
 @end example
 
-where @var{TYPE} can be either @var{audio} or @var{video},
-and @var{NAME} is the device's name or alternative name..
+where @var{TYPE} can be either @var{audio}, @var{video}, or @var{closed_caption}
+and @var{NAME} is the device's name or alternative name.
+
+@var{closed_caption} devices must advertise format VBI and have an intermediate
+directshow filter available to convert from VBI to raw EIA 608 closed caption
+format byte pairs.
 
 @subsection Options
 
@@ -615,6 +619,13 @@  Open video device @var{Camera} and audio device @var{Microphone}:
 $ ffmpeg -f dshow -i video="Camera":audio="Microphone"
 @end example
 
+@item
+Open video device @var{Camera}, closed caption device @var{Camera},
+and audio device @var{Microphone}:
+@example
+$ ffmpeg -f dshow -i video="Camera":audio="Microphone":closed_caption="Camera"
+@end example
+
 @item
 Print the list of supported options in selected device and exit:
 @example
diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index d7f5bd7069..18f8085db4 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c
@@ -90,24 +90,34 @@  dshow_read_close(AVFormatContext *s)
         libAVPin_Release(ctx->capture_pin[VideoDevice]);
     if (ctx->capture_pin[AudioDevice])
         libAVPin_Release(ctx->capture_pin[AudioDevice]);
+    if (ctx->capture_pin[ClosedCaptionDevice])
+        libAVPin_Release(ctx->capture_pin[ClosedCaptionDevice]);
     if (ctx->capture_filter[VideoDevice])
         libAVFilter_Release(ctx->capture_filter[VideoDevice]);
     if (ctx->capture_filter[AudioDevice])
         libAVFilter_Release(ctx->capture_filter[AudioDevice]);
+    if (ctx->capture_filter[ClosedCaptionDevice])
+        libAVFilter_Release(ctx->capture_filter[ClosedCaptionDevice]);
 
     if (ctx->device_pin[VideoDevice])
         IPin_Release(ctx->device_pin[VideoDevice]);
     if (ctx->device_pin[AudioDevice])
         IPin_Release(ctx->device_pin[AudioDevice]);
+    if (ctx->device_pin[ClosedCaptionDevice])
+        IPin_Release(ctx->device_pin[ClosedCaptionDevice]);
     if (ctx->device_filter[VideoDevice])
         IBaseFilter_Release(ctx->device_filter[VideoDevice]);
     if (ctx->device_filter[AudioDevice])
         IBaseFilter_Release(ctx->device_filter[AudioDevice]);
+    if (ctx->device_filter[ClosedCaptionDevice])
+        IBaseFilter_Release(ctx->device_filter[ClosedCaptionDevice]);
 
     av_freep(&ctx->device_name[0]);
     av_freep(&ctx->device_name[1]);
+    av_freep(&ctx->device_name[2]);
     av_freep(&ctx->device_unique_name[0]);
     av_freep(&ctx->device_unique_name[1]);
+    av_freep(&ctx->device_unique_name[2]);
 
     if(ctx->mutex)
         CloseHandle(ctx->mutex);
@@ -219,8 +229,8 @@  dshow_cycle_devices(AVFormatContext *avctx, ICreateDevEnum *devenum,
 
     const GUID *device_guid[2] = { &CLSID_VideoInputDeviceCategory,
                                    &CLSID_AudioInputDeviceCategory };
-    const char *devtypename = (devtype == VideoDevice) ? "video" : "audio only";
-    const char *sourcetypename = (sourcetype == VideoSourceDevice) ? "video" : "audio";
+    const char *devtypename = (devtype == VideoDevice) ? "video" : (devtype == AudioDevice) ? "audio" : "VBI";
+    const char *sourcetypename = (sourcetype == VideoSourceDevice) ? "video" : "audio only";
 
     r = ICreateDevEnum_CreateClassEnumerator(devenum, device_guid[sourcetype],
                                              (IEnumMoniker **) &classenum, 0);
@@ -569,9 +579,9 @@  dshow_cycle_pins(AVFormatContext *avctx, enum dshowDeviceType devtype,
     IPin *pin;
     int r;
 
-    const GUID *mediatype[2] = { &MEDIATYPE_Video, &MEDIATYPE_Audio };
-    const char *devtypename = (devtype == VideoDevice) ? "video" : "audio only";
-    const char *sourcetypename = (sourcetype == VideoSourceDevice) ? "video" : "audio";
+    const GUID *mediatype[3] = { &MEDIATYPE_Video, &MEDIATYPE_Audio, &MEDIATYPE_VBI };
+    const char *devtypename = (devtype == VideoDevice) ? "video" : (devtype == AudioDevice) ? "audio" : "VBI";
+    const char *sourcetypename = (sourcetype == VideoSourceDevice) ? "video" : "audio only";
 
     int set_format = (devtype == VideoDevice && (ctx->framerate ||
                                                 (ctx->requested_width && ctx->requested_height) ||
@@ -617,7 +627,7 @@  dshow_cycle_pins(AVFormatContext *avctx, enum dshowDeviceType devtype,
         if (IKsPropertySet_Get(p, &AMPROPSETID_Pin, AMPROPERTY_PIN_CATEGORY,
                                NULL, 0, &category, sizeof(GUID), &r2) != S_OK)
             goto next;
-        if (!IsEqualGUID(&category, &PIN_CATEGORY_CAPTURE))
+        if (!IsEqualGUID(&category, &PIN_CATEGORY_CAPTURE) && !IsEqualGUID(&category, &PIN_CATEGORY_VBI))
             goto next;
         name_buf = dup_wchar_to_utf8(info.achName);
 
@@ -739,10 +749,9 @@  dshow_open_device(AVFormatContext *avctx, ICreateDevEnum *devenum,
     IStream *ifile_stream = NULL;
     IStream *ofile_stream = NULL;
     IPersistStream *pers_stream = NULL;
-    enum dshowDeviceType otherDevType = (devtype == VideoDevice) ? AudioDevice : VideoDevice;
-
-    const wchar_t *filter_name[2] = { L"Audio capture filter", L"Video capture filter" };
+    enum dshowDeviceType all_device_types[3] = {VideoDevice, AudioDevice, ClosedCaptionDevice};
 
+    const wchar_t *filter_name[3] = { L"Audio ffmpeg capture filter", L"Video ffmpeg capture filter", L"VBI ffmpeg capture filter" };
 
     if ( ((ctx->audio_filter_load_file) && (strlen(ctx->audio_filter_load_file)>0) && (sourcetype == AudioSourceDevice)) ||
             ((ctx->video_filter_load_file) && (strlen(ctx->video_filter_load_file)>0) && (sourcetype == VideoSourceDevice)) ) {
@@ -778,16 +787,23 @@  dshow_open_device(AVFormatContext *avctx, ICreateDevEnum *devenum,
             goto error;
         }
     }
-        if (ctx->device_filter[otherDevType]) {
-        // avoid adding add two instances of the same device to the graph, one for video, one for audio
-        // a few devices don't support this (could also do this check earlier to avoid double crossbars, etc. but they seem OK)
-        if (strcmp(device_filter_unique_name, ctx->device_unique_name[otherDevType]) == 0) {
-          av_log(avctx, AV_LOG_DEBUG, "reusing previous graph capture filter... %s\n", device_filter_unique_name);
-          IBaseFilter_Release(device_filter);
-          device_filter = ctx->device_filter[otherDevType];
-          IBaseFilter_AddRef(ctx->device_filter[otherDevType]);
-        } else {
-            av_log(avctx, AV_LOG_DEBUG, "not reusing previous graph capture filter %s != %s\n", device_filter_unique_name, ctx->device_unique_name[otherDevType]);
+    for (int i = 0; i < sizeof(all_device_types) / sizeof(all_device_types[0]); i++) {
+        enum dshowDeviceType candidate = all_device_types[i];
+        if (candidate == devtype) {
+            continue; // ourself hasn't added one yet, skip
+        }
+        if (ctx->device_filter[candidate]) {
+            // avoid adding add two instances of the same device to the graph, one for video, one for audio
+            // a few devices don't support this (could also do this check earlier to avoid double crossbars, etc. but they seem OK)
+            if (strcmp(device_filter_unique_name, ctx->device_unique_name[candidate]) == 0) {
+                av_log(avctx, AV_LOG_DEBUG, "reusing previous graph capture filter... %s\n", device_filter_unique_name);
+                IBaseFilter_Release(device_filter);
+                device_filter = ctx->device_filter[candidate];
+                IBaseFilter_AddRef(ctx->device_filter[candidate]);
+            } else {
+                av_log(avctx, AV_LOG_DEBUG, "not reusing previous graph capture filter %s != %s\n", device_filter_unique_name, ctx->device_unique_name[candidate]);
+            }
+            break;
         }
     }
 
@@ -1011,7 +1027,7 @@  dshow_add_device(AVFormatContext *avctx,
                 }
             }
         }
-    } else {
+    } else if (devtype == AudioDevice) {
         WAVEFORMATEX *fx = NULL;
 
         if (IsEqualGUID(&type.formattype, &FORMAT_WaveFormatEx)) {
@@ -1027,6 +1043,10 @@  dshow_add_device(AVFormatContext *avctx,
         par->codec_id    = waveform_codec_id(par->format);
         par->sample_rate = fx->nSamplesPerSec;
         par->channels    = fx->nChannels;
+    } else {
+        // closed captions
+        par->codec_type  = AVMEDIA_TYPE_SUBTITLE;
+        par->codec_id    = AV_CODEC_ID_EIA_608_RAW_BYTE_PAIRS;
     }
 
     avpriv_set_pts_info(st, 64, 1, 10000000);
@@ -1052,24 +1072,29 @@  static int parse_device_name(AVFormatContext *avctx)
         char *token = strtok(NULL, ":");
         tmp = NULL;
 
-        if        (!strcmp(type, "video")) {
+        if (!strcmp(type, "video")) {
             device_name[0] = token;
         } else if (!strcmp(type, "audio")) {
             device_name[1] = token;
+        } else if (!strcmp(type, "closed_caption")) {
+            device_name[2] = token;
         } else {
             device_name[0] = NULL;
             device_name[1] = NULL;
+            device_name[2] = NULL;
             break;
         }
     }
 
-    if (!device_name[0] && !device_name[1]) {
+    if (!device_name[0] && !device_name[1] && !device_name[2]) {
         ret = 0;
     } else {
         if (device_name[0])
             device_name[0] = av_strdup(device_name[0]);
         if (device_name[1])
             device_name[1] = av_strdup(device_name[1]);
+        if (device_name[2])
+            device_name[2] = av_strdup(device_name[2]);
     }
 
     av_free(name);
@@ -1129,9 +1154,9 @@  static int dshow_read_header(AVFormatContext *avctx)
     }
 
     if (ctx->list_devices) {
-        av_log(avctx, AV_LOG_INFO, "DirectShow video devices (some may be both video and audio devices)\n");
+        av_log(avctx, AV_LOG_INFO, "DirectShow video devices (some may be both video and audio or VBI devices)\n"); // VBI seems to only be on video devices
         dshow_cycle_devices(avctx, devenum, VideoDevice, VideoSourceDevice, NULL, NULL);
-        av_log(avctx, AV_LOG_INFO, "DirectShow audio devices\n");
+        av_log(avctx, AV_LOG_INFO, "DirectShow audio [only] devices\n");
         dshow_cycle_devices(avctx, devenum, AudioDevice, AudioSourceDevice, NULL, NULL);
         ret = AVERROR_EXIT;
         goto error;
@@ -1151,6 +1176,11 @@  static int dshow_read_header(AVFormatContext *avctx)
                 }
             }
         }
+        if (ctx->device_name[ClosedCaptionDevice])
+            if ((r = dshow_list_device_options(avctx, devenum, ClosedCaptionDevice, VideoSourceDevice))) {
+                ret = r;
+                goto error;
+            }
     }
     if (ctx->device_name[VideoDevice]) {
         if ((r = dshow_open_device(avctx, devenum, VideoDevice, VideoSourceDevice)) < 0 ||
@@ -1171,6 +1201,13 @@  static int dshow_read_header(AVFormatContext *avctx)
             }
         }
     }
+    if (ctx->device_name[ClosedCaptionDevice]) {
+        if ((r = dshow_open_device(avctx, devenum, ClosedCaptionDevice, VideoSourceDevice)) < 0 ||
+            (r = dshow_add_device(avctx, ClosedCaptionDevice)) < 0) {
+            ret = r;
+            goto error;
+        }
+    }
     if (ctx->list_options) {
         /* allow it to list crossbar options in dshow_open_device */
         ret = AVERROR_EXIT;
@@ -1285,7 +1322,9 @@  static int dshow_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
     }
-
+    if (pkt) {
+      av_log(ctx, AV_LOG_DEBUG, "dshow passing to av pipeline packet of %8d ", pkt->size);
+    }
     return ctx->eof ? AVERROR(EIO) : pkt->size;
 }
 
diff --git a/libavdevice/dshow_capture.h b/libavdevice/dshow_capture.h
index 475d62ba99..d4aa5f809e 100644
--- a/libavdevice/dshow_capture.h
+++ b/libavdevice/dshow_capture.h
@@ -61,8 +61,10 @@  struct GUIDoffset {
 enum dshowDeviceType {
     VideoDevice = 0,
     AudioDevice = 1,
+    ClosedCaptionDevice = 2,
 };
 
+// We sometimes want "audio from a video source device" so differentiate this way:
 enum dshowSourceFilterType {
     VideoSourceDevice = 0,
     AudioSourceDevice = 1,
@@ -288,8 +290,8 @@  struct dshow_ctx {
 
     IGraphBuilder *graph;
 
-    char *device_name[2];
-    char *device_unique_name[2];
+    char *device_name[3];
+    char *device_unique_name[3];
 
     int video_device_number;
     int audio_device_number;
@@ -312,10 +314,10 @@  struct dshow_ctx {
     char *video_filter_load_file;
     char *video_filter_save_file;
 
-    IBaseFilter *device_filter[2];
-    IPin        *device_pin[2];
-    libAVFilter *capture_filter[2];
-    libAVPin    *capture_pin[2];
+    IBaseFilter *device_filter[3];
+    IPin        *device_pin[3];
+    libAVFilter *capture_filter[3];
+    libAVPin    *capture_pin[3];
 
     HANDLE mutex;
     HANDLE event[2]; /* event[0] is set by DirectShow
@@ -324,7 +326,7 @@  struct dshow_ctx {
 
     int eof;
 
-    int64_t curbufsize[2];
+    int64_t curbufsize[3];
     unsigned int video_frame_num;
 
     IMediaControl *control;
diff --git a/libavdevice/dshow_pin.c b/libavdevice/dshow_pin.c
index 53b1c9150d..f1bf93935a 100644
--- a/libavdevice/dshow_pin.c
+++ b/libavdevice/dshow_pin.c
@@ -52,9 +52,15 @@  libAVPin_ReceiveConnection(libAVPin *this, IPin *pin,
     if (devtype == VideoDevice) {
         if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Video))
             return VFW_E_TYPE_NOT_ACCEPTED;
-    } else {
+    } else if (devtype == AudioDevice) {
         if (!IsEqualGUID(&type->majortype, &MEDIATYPE_Audio))
             return VFW_E_TYPE_NOT_ACCEPTED;
+    } else {
+        if (IsEqualGUID(&type->majortype, &MEDIATYPE_AUXLine21Data) && IsEqualGUID(&type->subtype, &MEDIASUBTYPE_Line21_BytePair )) {
+            dshowdebug("accepting VBI RAW 608 input\n");
+	} else {
+            return VFW_E_TYPE_NOT_ACCEPTED;
+	}
     }
 
     IPin_AddRef(pin);
@@ -322,7 +328,7 @@  libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
     int64_t curtime;
     int64_t orig_curtime;
     int64_t graphtime;
-    const char *devtypename = (devtype == VideoDevice) ? "video" : "audio";
+    const char *devtypename = (devtype == VideoDevice) ? "video" : (devtype == AudioDevice) ? "audio" : "VBI";
     IReferenceClock *clock = pin->filter->clock;
     int64_t dummy;
     struct dshow_ctx *ctx;
@@ -359,7 +365,7 @@  libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
     ctx = s->priv_data;
     index = pin->filter->stream_index;
 
-    av_log(NULL, AV_LOG_VERBOSE, "dshow passing through packet of type %s size %8d "
+    av_log(NULL, AV_LOG_VERBOSE, "dshow captured packet of type %s size %8d "
         "timestamp %"PRId64" orig timestamp %"PRId64" graph timestamp %"PRId64" diff %"PRId64" %s\n",
         devtypename, buf_size, curtime, orig_curtime, graphtime, graphtime - orig_curtime, ctx->device_name[devtype]);
     pin->filter->callback(priv_data, index, buf, buf_size, curtime, devtype);
-- 
2.17.1