diff mbox series

[FFmpeg-devel,v8,3/3] libavdevice/avfoundation.m: Allow to select devices by unique ID

Message ID 0A27B227-B693-4B6B-834F-ADC51464C4F6@rastageeks.org
State New
Headers show
Series [FFmpeg-devel,v8,1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats | expand

Checks

Context Check Description
andriy/configurex86 warning Failed to apply patch
andriy/configureppc warning Failed to apply patch

Commit Message

Romain Beauxis Dec. 31, 2021, 5:45 p.m. UTC
Signed-off-by: Romain Beauxis <toots@rastageeks.org>
—
[Sorry for the noise but an issue came up with the previous set]

This is the third patch of a series of 3 that fix, cleanup and enhance the
avfoundation implementation for libavdevice.

These patches come from an actual user-facing application relying on
libavdevice’s implementation of avfoundation audio input. Without them,
Avfoundation is practically unusable as it will:
* Refuse to process certain specific audio input format that are actually
returned by the OS for some users (packed PCM audio)
* Drop audio frames, resulting in corrupted audio input. This might have been
unnoticed with video frames but this makes avfoundation essentially unusable
for audio.

The patches are now being included in our production build so they are tested
and usable in production

Changes:
v2: None
v3:
  * Switched unique ID to use system-prodvided unique ID
  * Implemented unique IDs for screen capture
v4: Cleanup
v5: Fix indentation/wrapping
v6: None
v7: None
V8: None

This patch adds a unique ID to avfoundation devices. This is needed
because device index can change while the machine is running when
devices are plugged or unplugged and device names can be tricky to use
with localization and etc.

Example of output:
./ffmpeg -f avfoundation -list_devices true -i ""
[...]
[AVFoundation indev @ 0x158705230] AVFoundation video devices:
[AVFoundation indev @ 0x158705230] [0] FaceTime HD Camera (ID: 47B4B64B70674B9CAD2BAE273A71F4B5)
[AVFoundation indev @ 0x158705230] [1] Capture screen 0 (ID: AvfilterAvfoundationCaptureScreen1)
[AVFoundation indev @ 0x158705230] AVFoundation audio devices:
[AVFoundation indev @ 0x158705230] [0] Loopback Audio (ID: com.rogueamoeba.Loopback.A5668B36-711E-4DF5-8A8D-7148508C735B)
[AVFoundation indev @ 0x158705230] [1] MacBook Pro Microphone (ID:BuiltInMicrophoneDevice)

Notes:
* Unique names do not seem to follow any specific pattern. I have used
one similar to the builtin microphone for screen capture
* The : substitution is actually required. The loopback device above did
have it in its name.

doc/indevs.texi            |  6 ++--
libavdevice/avfoundation.m | 72 +++++++++++++++++++++++++++++---------
2 files changed, 60 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/doc/indevs.texi b/doc/indevs.texi
index 9d8020311a..858c0fa4e4 100644
--- a/doc/indevs.texi
+++ b/doc/indevs.texi
@@ -114,7 +114,7 @@  The input filename has to be given in the following syntax:
-i "[[VIDEO]:[AUDIO]]"
@end example
The first entry selects the video input while the latter selects the audio input.
-The stream has to be specified by the device name or the device index as shown by the device list.
+The stream has to be specified by the device name, index or ID as shown by the device list.
Alternatively, the video and/or audio input device can be chosen by index using the
@option{
    -video_device_index <INDEX>
@@ -127,7 +127,9 @@  and/or
device name or index given in the input filename.

All available devices can be enumerated by using @option{-list_devices true}, listing
-all device names and corresponding indices.
+all device names, corresponding indices and IDs, when available. Device name can be 
+tricky to use when localized and device index can change when devices are plugged or unplugged. A device
+hash, when available, uniquely identifies a device and should not change over time.

There are two device name aliases:
@table @code
diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
index 36f9fdc53d..d09a81cb3b 100644
--- a/libavdevice/avfoundation.m
+++ b/libavdevice/avfoundation.m
@@ -39,6 +39,8 @@ 
#include "libavutil/imgutils.h"
#include "avdevice.h"

+#define CLEANUP_DEVICE_ID(s) [[s stringByReplacingOccurrencesOfString:@":" withString:@"."] UTF8String]
+
#define av_log_avfoundation_error(s, str, err) \
   av_log(s, AV_LOG_ERROR, "Avfoundation: %s, %s\n", str, \
     [[[NSError errorWithDomain:NSOSStatusErrorDomain code:err userInfo:nil] localizedDescription] UTF8String] \
@@ -815,21 +817,23 @@  static int avf_read_header(AVFormatContext *s)
        int index = 0;
        av_log(ctx, AV_LOG_INFO, "AVFoundation video devices:\n");
        for (AVCaptureDevice *device in devices) {
-            const char *name = [[device localizedName] UTF8String];
-            index            = [devices indexOfObject:device];
-            av_log(ctx, AV_LOG_INFO, "[%d] %s\n", index, name);
+            const char *name     = [[device localizedName] UTF8String];
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            index                = [devices indexOfObject:device];
+            av_log(ctx, AV_LOG_INFO, "[%d] %s (ID: %s)\n", index, name, uniqueId);
        }
        for (AVCaptureDevice *device in devices_muxed) {
-            const char *name = [[device localizedName] UTF8String];
-            index            = [devices count] + [devices_muxed indexOfObject:device];
-            av_log(ctx, AV_LOG_INFO, "[%d] %s\n", index, name);
+            const char *name     = [[device localizedName] UTF8String];
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            index                = [devices count] + [devices_muxed indexOfObject:device];
+            av_log(ctx, AV_LOG_INFO, "[%d] %s (ID: %s)\n", index, name, uniqueId);
        }
#if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
        if (num_screens > 0) {
            CGDirectDisplayID screens[num_screens];
            CGGetActiveDisplayList(num_screens, screens, &num_screens);
            for (int i = 0; i < num_screens; i++) {
-                av_log(ctx, AV_LOG_INFO, "[%d] Capture screen %d\n", ctx->num_video_devices + i, i);
+                av_log(ctx, AV_LOG_INFO, "[%d] Capture screen %d (ID: AvfilterAvfoundationCaptureScreen%d)\n", ctx->num_video_devices + i, i, screens[i]);
            }
        }
#endif
@@ -837,9 +841,10 @@  static int avf_read_header(AVFormatContext *s)
        av_log(ctx, AV_LOG_INFO, "AVFoundation audio devices:\n");
        devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio];
        for (AVCaptureDevice *device in devices) {
-            const char *name = [[device localizedName] UTF8String];
-            int index  = [devices indexOfObject:device];
-            av_log(ctx, AV_LOG_INFO, "[%d] %s\n", index, name);
+            const char *name     = [[device localizedName] UTF8String];
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            int index            = [devices indexOfObject:device];
+            av_log(ctx, AV_LOG_INFO, "[%d] %s (ID: %s)\n", index, name, uniqueId);
        }
         goto fail;
    }
@@ -901,14 +906,29 @@  static int avf_read_header(AVFormatContext *s)
        } else {
        // looking for video inputs
        for (AVCaptureDevice *device in devices) {
-            if (!strncmp(ctx->video_filename, [[device localizedName] UTF8String], strlen(ctx->video_filename))) {
+            const char *name = [[device localizedName] UTF8String];
+            if (!strncmp(ctx->video_filename, name, strlen(ctx->video_filename))) {
+                video_device = device;
+                break;
+            }
+
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            if (!strncmp(ctx->video_filename, uniqueId, strlen(ctx->video_filename))) {
                video_device = device;
                break;
            }
        }
        // looking for muxed inputs
        for (AVCaptureDevice *device in devices_muxed) {
-            if (!strncmp(ctx->video_filename, [[device localizedName] UTF8String], strlen(ctx->video_filename))) {
+            const char *name = [[device localizedName] UTF8String];
+            if (!strncmp(ctx->video_filename, name, strlen(ctx->video_filename))) {
+                video_device = device;
+                ctx->video_is_muxed = 1;
+                break;
+            }
+
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            if (!strncmp(ctx->video_filename, uniqueId, strlen(ctx->video_filename))) {
                video_device = device;
                ctx->video_is_muxed = 1;
                break;
@@ -919,10 +939,23 @@  static int avf_read_header(AVFormatContext *s)
        // looking for screen inputs
        if (!video_device) {
            int idx;
+            CGDirectDisplayID screens[num_screens];
+            CGGetActiveDisplayList(num_screens, screens, &num_screens);
+            AVCaptureScreenInput* capture_screen_input = NULL;
+
            if(sscanf(ctx->video_filename, "Capture screen %d", &idx) && idx < num_screens) {
-                CGDirectDisplayID screens[num_screens];
-                CGGetActiveDisplayList(num_screens, screens, &num_screens);
-                AVCaptureScreenInput* capture_screen_input = [[[AVCaptureScreenInput alloc] initWithDisplayID:screens[idx]] autorelease];
+                capture_screen_input = [[[AVCaptureScreenInput alloc] initWithDisplayID:screens[idx]] autorelease];
+            }
+
+            if(sscanf(ctx->video_filename, "AvfilterAvfoundationCaptureScreen%d", &idx)) {
+                for (int i = 0; i < num_screens; i++) {
+                    if (screens[i] == idx) {
+                        capture_screen_input = [[[AVCaptureScreenInput alloc] initWithDisplayID:idx] autorelease];
+                    }
+                }
+            }
+
+            if (capture_screen_input) {
                video_device = (AVCaptureDevice*) capture_screen_input;
                ctx->video_device_index = ctx->num_video_devices + idx;
                ctx->video_is_screen = 1;
@@ -973,7 +1006,14 @@  static int avf_read_header(AVFormatContext *s)
        NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio];

        for (AVCaptureDevice *device in devices) {
-            if (!strncmp(ctx->audio_filename, [[device localizedName] UTF8String], strlen(ctx->audio_filename))) {
+            const char *name = [[device localizedName] UTF8String];
+            if (!strncmp(ctx->audio_filename, name, strlen(ctx->audio_filename))) {
+                audio_device = device;
+                break;
+            }
+
+            const char *uniqueId = CLEANUP_DEVICE_ID([device uniqueID]);
+            if (!strncmp(ctx->audio_filename, uniqueId, strlen(ctx->audio_filename))) {
                audio_device = device;
                break;
            }