vpdq 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vpdq might be problematic. Click here for more details.

Files changed (64) hide show
  1. {vpdq-0.2.1/python/vpdq.egg-info → vpdq-0.2.2}/PKG-INFO +1 -1
  2. {vpdq-0.2.1 → vpdq-0.2.2}/README.md +108 -55
  3. vpdq-0.2.2/cpp/CMakeLists.txt +99 -0
  4. vpdq-0.2.2/cpp/apps/CMakeLists.txt +9 -0
  5. vpdq-0.2.2/cpp/pdq/CMakeLists.txt +37 -0
  6. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/hashing/torben.cpp +5 -3
  7. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/hashing/torben.h +6 -8
  8. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/regtest.py +7 -2
  9. vpdq-0.2.2/cpp/vpdq/CMakeLists.txt +52 -0
  10. vpdq-0.2.2/cpp/vpdq/cpp/hashing/ffmpegutils.cpp +68 -0
  11. vpdq-0.2.2/cpp/vpdq/cpp/hashing/ffmpegutils.h +40 -0
  12. vpdq-0.2.2/cpp/vpdq/cpp/hashing/ffmpegwrapper.cpp +143 -0
  13. vpdq-0.2.2/cpp/vpdq/cpp/hashing/ffmpegwrapper.h +142 -0
  14. vpdq-0.2.2/cpp/vpdq/cpp/hashing/filehasher.cpp +253 -0
  15. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/filehasher.h +8 -9
  16. vpdq-0.2.2/cpp/vpdq/cpp/hashing/hasher.h +282 -0
  17. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/matchTwoHash.cpp +2 -2
  18. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/matchTwoHash.h +2 -2
  19. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/io/vpdqio.cpp +0 -1
  20. {vpdq-0.2.1 → vpdq-0.2.2}/python/tools/generate_hashes.py +2 -0
  21. {vpdq-0.2.1 → vpdq-0.2.2/python/vpdq.egg-info}/PKG-INFO +1 -1
  22. {vpdq-0.2.1 → vpdq-0.2.2}/python/vpdq.egg-info/SOURCES.txt +11 -3
  23. {vpdq-0.2.1 → vpdq-0.2.2}/setup.py +25 -13
  24. vpdq-0.2.2/version.txt +1 -0
  25. vpdq-0.2.1/cpp/CMakeLists.txt +0 -110
  26. vpdq-0.2.1/cpp/vpdq/cpp/hashing/filehasher.cpp +0 -548
  27. vpdq-0.2.1/version.txt +0 -1
  28. {vpdq-0.2.1 → vpdq-0.2.2}/LICENSE.txt +0 -0
  29. {vpdq-0.2.1 → vpdq-0.2.2}/MANIFEST.in +0 -0
  30. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/README.md +0 -0
  31. {vpdq-0.2.1/cpp/vpdq/cpp/bin → vpdq-0.2.2/cpp/apps}/match-hashes-brute.cpp +0 -0
  32. {vpdq-0.2.1/cpp/vpdq/cpp/bin → vpdq-0.2.2/cpp/apps}/match-hashes-byline.cpp +0 -0
  33. {vpdq-0.2.1/cpp/vpdq/cpp/bin → vpdq-0.2.2/cpp/apps}/vpdq-hash-video.cpp +0 -0
  34. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/README.md +0 -0
  35. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/common/pdqbasetypes.h +0 -0
  36. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/common/pdqhamming.cpp +0 -0
  37. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/common/pdqhamming.h +0 -0
  38. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/common/pdqhashtypes.cpp +0 -0
  39. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/common/pdqhashtypes.h +0 -0
  40. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/downscaling/downscaling.cpp +0 -0
  41. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/downscaling/downscaling.h +0 -0
  42. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/hashing/pdqhashing.cpp +0 -0
  43. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/hashing/pdqhashing.h +0 -0
  44. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/index/mih.h +0 -0
  45. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/io/hashio.cpp +0 -0
  46. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/io/hashio.h +0 -0
  47. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/io/pdqio.cpp +0 -0
  48. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/pdq/cpp/io/pdqio.h +0 -0
  49. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/bufferhasher.cpp +0 -0
  50. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/bufferhasher.h +0 -0
  51. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/hashing/vpdqHashType.h +0 -0
  52. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq/cpp/io/vpdqio.h +0 -0
  53. {vpdq-0.2.1 → vpdq-0.2.2}/cpp/vpdq_match.py +0 -0
  54. {vpdq-0.2.1 → vpdq-0.2.2}/pyproject.toml +0 -0
  55. {vpdq-0.2.1 → vpdq-0.2.2}/python/README.md +0 -0
  56. {vpdq-0.2.1 → vpdq-0.2.2}/python/__init__.py +0 -0
  57. {vpdq-0.2.1 → vpdq-0.2.2}/python/tests/test_util.py +0 -0
  58. {vpdq-0.2.1 → vpdq-0.2.2}/python/tests/test_vpdq_hash.py +0 -0
  59. {vpdq-0.2.1 → vpdq-0.2.2}/python/vpdq.egg-info/dependency_links.txt +0 -0
  60. {vpdq-0.2.1 → vpdq-0.2.2}/python/vpdq.egg-info/entry_points.txt +0 -0
  61. {vpdq-0.2.1 → vpdq-0.2.2}/python/vpdq.egg-info/top_level.txt +0 -0
  62. {vpdq-0.2.1 → vpdq-0.2.2}/python/vpdq.pyx +0 -0
  63. {vpdq-0.2.1 → vpdq-0.2.2}/requirements.txt +0 -0
  64. {vpdq-0.2.1 → vpdq-0.2.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vpdq
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Python bindings for Facebook VPDQ hash
5
5
  Author-email: Meta <threatexchange@meta.com>
6
6
  License: Copyright (c) 2017- Facebook
@@ -1,15 +1,22 @@
1
1
  # Summary
2
+
2
3
  vPDQ (Video PDQ) is a video-similarity-detection algorithm, which uses the PDQ image similarity algorithm on video frames to measure the similarity of videos.
3
4
  Full details of PDQ are located in the [hashing.pdf](https://github.com/facebook/ThreatExchange/blob/main/hashing/hashing.pdf) document.
4
5
  It allows for matching individual frames against known bad images, as well as which segments of a video are matching.
6
+
7
+ See [CPP implementation](#cpp-implementation) for how to install and use vpdq.
8
+
5
9
  ## Compared to TMK+PDQF
10
+
6
11
  Compared to TMK+PDQF (TMK), which also relies on the PDQ image hashing algorithm:
7
12
  TMK optimizes for identical videos (same length), vPDQ can match subsequences or clips within videos.
8
13
  TMK has a fixed-length hash, which simplifies matching lookup, and can be near constant time with the help of FAISS. vPDQ produces a variable length hash, and requires a linear comparison of candidates. This requires either an O(n*F<sub>c</sub>*F<sub>q</sub>) lookup where n is the number of videos being compared, and F<sub>c</sub> is the average number of frames per compared video and F<sub>q</sub> is the number of frames in the source video, or an initial filtering pass to reduce the candidates, which can potentially discard matching videos.
9
14
  Both TMK and vPDQ are backed by PDQ, and so inherit both PDQ’s strengths and weaknesses.
10
15
 
11
- # Description of Algorithm
12
- ## Producing a Hash
16
+ ## Description of Algorithm
17
+
18
+ ### Producing a Hash
19
+
13
20
  The algorithm for producing the “hash” is simple: given a video, convert it into a sequence of frame images at some interval (for example, 1 frame/second). For each frame image, use the PDQ hashing algorithm on each.
14
21
 
15
22
  We can annotate these hashes with their frame number, quality(0-100 which measures gradients/features,from least featureful to most featureful) and timestamp(sec). So for a 5 minute video at 1 frame/sec, we might have:
@@ -26,26 +33,28 @@ We can annotate these hashes with their frame number, quality(0-100 which measur
26
33
  For the matching algorithm, the frame numbers are not used, but they can still be useful for identifying matching segments when comparing videos.
27
34
 
28
35
  ### Pruning Frames
36
+
29
37
  Often, many frames are repeated in a video, or frames are very close to each other in PDQ distance. It is possible to reduce the number of frames in a hash by omitting subsequent frames that are within a distance D<sub>prune</sub> of the last retained frame.
30
38
 
31
39
  In the previous example, with D<sub>prune</sub> of 2 we might instead end up with:
32
40
  | Frame | PDQ Hash | Distance from last retained frame| Result |
33
41
  | ------------- | ------------- | ------------- |------------- |
34
- | 1 | face000... | N/A | Retain
35
- | 2 | face000... | 0 | Prune
36
- | 3 | face011... | 2 | Prune
37
- | 4 | face111... | 3 | Retain
38
- | 5 | face111... | 0 | Prune
42
+ | 1 | face000... | N/A | Retain |
43
+ | 2 | face000... | 0 | Prune |
44
+ | 3 | face011... | 2 | Prune |
45
+ | 4 | face111... | 3 | Retain |
46
+ | 5 | face111... | 0 | Prune |
39
47
  | ... | ... | ... | ... |
40
48
 
41
49
  Afterwards, what is left is:
42
- | Frame | PDQ Hash
50
+ | Frame | PDQ Hash |
43
51
  | ------------- | ------------- |
44
52
  | 1 | face000... |
45
53
  | 4 | face111... |
46
54
  | ... | ... |
47
55
 
48
- ## Comparison (Matching) Algorithm
56
+ ### Comparison (Matching) Algorithm
57
+
49
58
  There are four inputs to the comparison algorithm, which determines if two videos are considered similar by vPDQ:
50
59
 
51
60
  1. The query video’s frame PDQ hashes Q
@@ -62,7 +71,8 @@ There are four inputs to the comparison algorithm, which determines if two video
62
71
  - Using P<sub>c</sub> = 100% and P<sub>q</sub> = 100% will attempt to find only videos with the exact same frame content
63
72
 
64
73
  Here is the algorithm, in pseudocode:
65
- ```
74
+
75
+ ```python
66
76
  q_unique_frames = set(Q)
67
77
  c_unique_frames = set(C)
68
78
  q_unique_frames_matched_count = 0
@@ -89,11 +99,11 @@ is_match = c_pct_matched >= P_c and q_pct_matched >= P_q
89
99
 
90
100
  > **Note**: The frame number and the timestamp is not used at all in this comparison. The frames are treated as an unordered “bag of hashes”. The frame number and timestamp are included in each feature in the reference implementation in case of future expansion.
91
101
 
92
-
93
102
  ### Pruning Candidates
103
+
94
104
  When the number of potential candidates is high, the n*F<sub>c</sub>*F<sub>q</sub> algorithm might be too expensive to run. One potential solution for filtering is indexing frames from candidate videos into an index like FAISS, keyed to the video to compare. Our lookup algorithm then becomes:
95
105
 
96
- ```
106
+ ```python
97
107
  candidate_video_ids = set()
98
108
 
99
109
  for q_frame in Q:
@@ -112,22 +122,42 @@ for c_id in candidate_video_ids:
112
122
 
113
123
  Beyond pruning frames from candidates, it may be desirable to further prune to just sampled or key frames in candidate videos to control index size, but this may result in videos being incorrectly pruned.
114
124
 
115
- # CPP Implementation
116
- This implementation does not have Pruning Frames and Pruning Candidates.
125
+ ## CPP Implementation
126
+
127
+ The reference implementation for vpdq is written in C++. In addition, there are [Python bindings](#python-binding) to allow the use of vpdq from Python.
128
+
129
+ > **Note**: This implementation does not have Pruning Frames and Pruning Candidates.
130
+
131
+ The C++ implementation requires some external libraries to build.
132
+
133
+ Follow the [manual installation guide](#manual-installation) below for how to build vpdq. Alternatively, a [Dockerfile](../Dockerfile.vpdq) and devcontainer config are provided for convience.
134
+
135
+ ## Docker Development
136
+
137
+ Docker can be used for development, preferably using a devcontainer with VSCode.
138
+
139
+ Build the Docker image:
140
+
141
+ ```sh
142
+ # ThreatExchage/
143
+ docker build -t vpdq . -f Dockerfile.vpdq
144
+ ```
145
+
146
+ After building the image, you can easily connect to it using the VSCode devcontainer extension. See [the VSCode devcontainer tutorial](https://code.visualstudio.com/docs/devcontainers/containers#_quick-start-open-an-existing-folder-in-a-container) for more information.
117
147
 
118
- ## Build Dependencies
148
+ Once you are in the container proceed to [**Building**](#building).
119
149
 
120
- * C++14
121
- * CMake
122
- * make
123
- * FFmpeg and libav* libraries
150
+ ## Manual Installation
124
151
 
125
- #### MacOS on Apple M1
152
+ ### Dependencies
126
153
 
127
- * Currently the builtin Apple clang g++ does not work for building this implementation
128
- * Installing GCC and updating the `CMake`s CXX to use that version of g++ instead is recommended
154
+ - C++14
155
+ - CMake
156
+ - pkg-config
157
+ - make
158
+ - FFmpeg and libav* libraries
129
159
 
130
- ## Install FFmpeg
160
+ ### Install FFmpeg
131
161
 
132
162
  [FFmpeg](https://ffmpeg.org/) and its [libav* libraries](https://trac.ffmpeg.org/wiki/Using%20libav*) must be installed before building.
133
163
 
@@ -140,6 +170,7 @@ macOS: `brew install ffmpeg`
140
170
  Windows MinGW/MSYS2: `pacman -S mingw-w64-x86_64-ffmpeg`
141
171
 
142
172
  To check if it's installed:
173
+
143
174
  ```sh
144
175
  $ ffmpeg
145
176
  ffmpeg version 4.4.2 Copyright (c) 2000-2023 the FFmpeg developers
@@ -148,21 +179,19 @@ ffmpeg version 4.4.2 Copyright (c) 2000-2023 the FFmpeg developers
148
179
 
149
180
  > **Note**: The actual version information displayed here may vary from one system to another; but if a message such as `ffmpeg: command not found` appears instead of the version information, FFmpeg is not properly installed.
150
181
 
182
+ ### Install libav*
151
183
 
152
- ## Install libav*
153
-
154
- Some package managers will install the libav* libraries bundled with FFmpeg.
155
-
156
- If they don't you will need to install them separately.
184
+ Some package managers will install the libav* libraries bundled with FFmpeg. But if yours does not then you will need to install them manually.
157
185
 
158
186
  Required:
159
- - libavdevice
160
- - libavfilter
161
- - libavformat
162
- - libavcodec
163
- - libswresample
164
- - libswscale
165
- - libavutil
187
+
188
+ - libavdevice
189
+ - libavfilter
190
+ - libavformat
191
+ - libavcodec
192
+ - libswresample
193
+ - libswscale
194
+ - libavutil
166
195
 
167
196
  Debian/Ubuntu:
168
197
 
@@ -170,36 +199,57 @@ Debian/Ubuntu:
170
199
  sudo apt-get install -y libavdevice-dev libavfilter-dev libavformat-dev libavcodec-dev libswresample-dev libswscale-dev libavutil-dev
171
200
  ```
172
201
 
202
+ All dependencies should now be installed. Proceed to [**Building**](#building).
203
+
173
204
  ## Building
174
205
 
175
- In vpdq/cpp:
206
+ Build using the usual CMake commands:
207
+
176
208
  ```sh
177
- mkdir build
178
- cd build
179
- cmake ..
180
- make
209
+ # vpdq/cpp
210
+ # Generate CMake project
211
+ cmake -S . -B build
212
+ # Build
213
+ cmake --build build -j
181
214
  ```
182
215
 
183
- This will produce 3 executable programs:
184
- - vpdq-hash-video
185
- - match-hashes-byline
186
- - match-hashes-brute
187
-
188
- Run the executables with `-h` or see below for usage information.
216
+ > **Note:** The CMake files will respect your `-DCMAKE_BUILD_TYPE` option.
217
+ >
218
+ > For example, to build with optimizations pass `-DCMAKE_BUILD_TYPE=Release` to the generator command (the first one above).
219
+ >
220
+ > To build with optimizations and debug info, pass `-DCMAKE_BUILD_TYPE=RelWithDebInfo`.
221
+ >
222
+ > There is also a custom `Asan` and `Tsan` build type to compile with address/thread sanitizers (Linux only).
223
+ >
224
+ > See [CMAKE_BUILD_TYPE documentation](https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html) for more information.
225
+
226
+ This will build both the library and 3 CLI programs:
227
+
228
+ - vpdq-hash-video
229
+ - match-hashes-byline
230
+ - match-hashes-brute
231
+
232
+ The CLI programs will be found in `build/apps`.
233
+
234
+ The vpdq library will be located at `build/vpdq/libvpdqlib.a`.
235
+
236
+ Run the CLI programs with `-h` to see their usage information.
189
237
 
190
238
  ## Usage
191
239
 
240
+ Some Python scripts are used for testing the C++ implementation, but they do not require the Python binding to be installed. These scripts are located in the [cpp](./cpp) folder.
241
+
192
242
  This demo shows how to use `vpdq_match.py` to compare one target hash with all the queried hashes in the `sample-hashes`.
193
243
 
194
244
  The target hash must be generated with vpdq-hash-video before running.
195
245
 
196
- #### Brute-force matching
246
+ ### Brute-force matching
197
247
 
198
- In vpdq/cpp:
199
248
  ```sh
249
+ # vpdq/cpp
200
250
  python vpdq_match.py -f sample-hashes -i output-hashes/chair-19-sd-bar.txt
201
-
202
251
  ```
252
+
203
253
  Sample Output:
204
254
 
205
255
  ```sh
@@ -220,12 +270,13 @@ Matching Target ../ThreatExchange/vpdq/cpp/sampletest/chair-19-sd-bar.txt with .
220
270
  ---
221
271
 
222
272
  #### Regression Test
273
+
223
274
  An additional Python script, `regtest.py` can be used to test for changes in output during development.
224
275
 
225
276
  It hashes the provided sample videos and compares them with known good hashes from `sample-hashes` line by line.
226
277
 
227
- In vpdq/cpp:
228
278
  ```sh
279
+ # vpdq/cpp
229
280
  python regtest.py
230
281
 
231
282
  Matching File pattern-sd-with-small-logo-bar.txt
@@ -244,23 +295,25 @@ Matching File chair-22-with-small-logo-bar.txt
244
295
  100.000000 Percentage matches
245
296
  ```
246
297
 
247
- ## vPDQ Python Binding
248
- A Cython binding is available to the CPP library for linux and Mac users. All of the dependencies from the CPP implementation are required to build the binding.
298
+ ### Python Binding
249
299
 
250
- See [README.md in `python/`](./python/README.md) for more information.
300
+ A Cython binding is available to that using the C++ library for Linux and macos.
301
+
302
+ All of the dependencies from the C++ implementation are required to build the binding.
251
303
 
252
304
  ```sh
253
305
  pip install vpdq
254
306
  ```
255
307
 
308
+ See [README.md in `python/`](./python/README.md) for more information.
256
309
 
257
310
  ## FAISS
311
+
258
312
  [FAISS](https://github.com/facebookresearch/faiss) has been successfully integrated with vPDQ in the [python-threatexchange](../python-threatexchange/threatexchange/extensions/vpdq) library. See the [README](../python-threatexchange/threatexchange/extensions/vpdq/README.md) for more information.
259
313
 
260
314
  ## Contact
261
315
 
262
- threatexchange@fb.com
263
-
316
+ threatexchange@meta.com
264
317
 
265
318
  ---
266
319
 
@@ -0,0 +1,99 @@
1
+ # Top level CMake for vpdq
2
+ # This will build vpdq, pdq, and the vpdq CLI programs.
3
+
4
+ cmake_minimum_required(VERSION 3.17)
5
+
6
+ project(vpdq LANGUAGES CXX)
7
+ set(CMAKE_CXX_STANDARD 14)
8
+
9
+ # Sanitizer build type options.
10
+ # This allows you to build with address/thread sanitizer by using
11
+ # -DCMAKE_BUILD_TYPE=Asan or Tsan on the generator.
12
+ # From https://stackoverflow.com/a/64294837
13
+ if(NOT MSVC)
14
+ get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
15
+
16
+ if(isMultiConfig)
17
+ if(NOT "Asan" IN_LIST CMAKE_CONFIGURATION_TYPES)
18
+ list(APPEND CMAKE_CONFIGURATION_TYPES Asan)
19
+ endif()
20
+ if(NOT "Tsan" IN_LIST CMAKE_CONFIGURATION_TYPES)
21
+ list(APPEND CMAKE_CONFIGURATION_TYPES Tsan)
22
+ endif()
23
+ else()
24
+ set(allowedBuildTypes Asan Tsan Debug Release RelWithDebInfo MinSizeRel)
25
+ set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "${allowedBuildTypes}")
26
+
27
+ if(CMAKE_BUILD_TYPE AND NOT CMAKE_BUILD_TYPE IN_LIST allowedBuildTypes)
28
+ message(FATAL_ERROR "Invalid build type: ${CMAKE_BUILD_TYPE}")
29
+ endif()
30
+ endif()
31
+
32
+ # Asan
33
+ set(CMAKE_C_FLAGS_ASAN
34
+ "${CMAKE_C_FLAGS_DEBUG} -fsanitize=address,leak,undefined -fno-omit-frame-pointer" CACHE STRING
35
+ "Flags used by the C compiler for Asan build type or configuration." FORCE)
36
+
37
+ set(CMAKE_CXX_FLAGS_ASAN
38
+ "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address,leak,undefined -fno-omit-frame-pointer" CACHE STRING
39
+ "Flags used by the C++ compiler for Asan build type or configuration." FORCE)
40
+
41
+ set(CMAKE_EXE_LINKER_FLAGS_ASAN
42
+ "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=address,leak,undefined" CACHE STRING
43
+ "Linker flags to be used to create executables for Asan build type." FORCE)
44
+
45
+ set(CMAKE_SHARED_LINKER_FLAGS_ASAN
46
+ "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=address,leak,undefined" CACHE STRING
47
+ "Linker lags to be used to create shared libraries for Asan build type." FORCE)
48
+
49
+ # Tsan
50
+ set(CMAKE_C_FLAGS_TSAN
51
+ "${CMAKE_C_FLAGS_DEBUG} -fsanitize=thread,undefined -fno-omit-frame-pointer" CACHE STRING
52
+ "Flags used by the C compiler for Asan build type or configuration." FORCE)
53
+
54
+ set(CMAKE_CXX_FLAGS_TSAN
55
+ "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=thread,undefined -fno-omit-frame-pointer" CACHE STRING
56
+ "Flags used by the C++ compiler for Asan build type or configuration." FORCE)
57
+
58
+ set(CMAKE_EXE_LINKER_FLAGS_TSAN
59
+ "${CMAKE_EXE_LINKER_FLAGS_DEBUG} -fsanitize=thread,undefined" CACHE STRING
60
+ "Linker flags to be used to create executables for Tsan build type." FORCE)
61
+
62
+ set(CMAKE_SHARED_LINKER_FLAGS_TSAN
63
+ "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} -fsanitize=thread,undefined" CACHE STRING
64
+ "Linker lags to be used to create shared libraries for Tsan build type." FORCE)
65
+ endif()
66
+
67
+
68
+ # Find Threads for C++ multithreading
69
+ set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
70
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
71
+ find_package(Threads REQUIRED)
72
+
73
+ # Find libav* FFmpeg libraries using pkg-config
74
+ find_package(PkgConfig REQUIRED)
75
+ pkg_check_modules(LIBAV REQUIRED IMPORTED_TARGET
76
+ libavdevice
77
+ libavfilter
78
+ libavformat
79
+ libavcodec
80
+ libswresample
81
+ libswscale
82
+ libavutil
83
+ )
84
+
85
+ # pdq library
86
+ add_subdirectory(pdq)
87
+
88
+ # vpdq library
89
+ add_subdirectory(vpdq)
90
+
91
+ # CLI programs
92
+ # TODO: Make this a custom command/option. This isn't necessary to just build the library.
93
+ add_subdirectory(apps)
94
+
95
+ # Write the libav* library dirs to a new-line delimited file for Cython to be able to locate LIBAV files
96
+ # TODO: Make this a custom command or something. This isn't necessary on every run.
97
+ string(REPLACE ";" "\n" LIBRARY_DIRS "${LIBAV_STATIC_LIBRARY_DIRS}")
98
+ set(LIBRARY_DIRS_FILE "libraries-dirs.txt")
99
+ file(WRITE ${LIBRARY_DIRS_FILE} "${LIBRARY_DIRS}")
@@ -0,0 +1,9 @@
1
+ # vpdq CLI programs
2
+
3
+ add_executable(match-hashes-brute match-hashes-brute.cpp)
4
+ add_executable(match-hashes-byline match-hashes-byline.cpp)
5
+ add_executable(vpdq-hash-video vpdq-hash-video.cpp)
6
+
7
+ target_link_libraries(match-hashes-brute PRIVATE vpdqlib)
8
+ target_link_libraries(match-hashes-byline PRIVATE vpdqlib)
9
+ target_link_libraries(vpdq-hash-video PRIVATE vpdqlib)
@@ -0,0 +1,37 @@
1
+ # PDQ library
2
+ # This will produce one library file: libpdqlib
3
+
4
+ set(PDQSOURCES
5
+ cpp/common/pdqhashtypes.cpp
6
+ cpp/hashing/pdqhashing.cpp
7
+ cpp/common/pdqhamming.cpp
8
+ cpp/io/hashio.cpp
9
+ cpp/downscaling/downscaling.cpp
10
+ cpp/hashing/torben.cpp
11
+ )
12
+
13
+ set(PDQHEADERS
14
+ cpp/common/pdqhashtypes.h
15
+ cpp/common/pdqbasetypes.h
16
+ cpp/common/pdqhamming.h
17
+ cpp/hashing/pdqhashing.h
18
+ cpp/io/hashio.h
19
+ cpp/downscaling/downscaling.h
20
+ cpp/hashing/torben.h
21
+ )
22
+
23
+ # Note: Including header files here helps IDEs, but is not required.
24
+ add_library(pdqlib ${PDQHEADERS} ${PDQSOURCES})
25
+
26
+ # We need this directory, and users of the library will need it too.
27
+ target_include_directories(pdqlib PUBLIC
28
+ # We go up a directory so that the source files can include the
29
+ # whole path, e.g. <pdq/cpp/common/pdqbasetypes.h>
30
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
31
+ )
32
+
33
+ # Turn on -fPIC
34
+ set_target_properties(pdqlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
35
+
36
+ # All users of this library will need at least C++11
37
+ target_compile_features(pdqlib PUBLIC cxx_std_11)
@@ -1,13 +1,15 @@
1
1
  // ================================================================
2
- // The following code is public domain.
3
- // Algorithm by Torben Mogensen, implementation by N. Devillard.
4
- // This code in public domain.
2
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
5
3
  // ================================================================
6
4
 
7
5
  namespace facebook {
8
6
  namespace pdq {
9
7
  namespace hashing {
10
8
 
9
+ /**
10
+ * The following code is public domain.
11
+ * Algorithm by Torben Mogensen, implementation by N. Devillard.
12
+ */
11
13
  float torben(float m[], int n) {
12
14
  int i, less, greater, equal;
13
15
  float min, max, guess, maxltguess, mingtguess;
@@ -1,20 +1,18 @@
1
1
  // ================================================================
2
- // The following code is public domain.
3
- // Algorithm by Torben Mogensen, implementation by N. Devillard.
4
- // This code in public domain.
2
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
5
3
  // ================================================================
6
4
 
7
5
  #ifndef TORBEN_H
8
6
  #define TORBEN_H
9
- /*
10
- * The following code is public domain.
11
- * Algorithm by Torben Mogensen, implementation by N. Devillard.
12
- * This code in public domain.
13
- */
14
7
 
15
8
  namespace facebook {
16
9
  namespace pdq {
17
10
  namespace hashing {
11
+
12
+ /**
13
+ * The following code is public domain.
14
+ * Algorithm by Torben Mogensen, implementation by N. Devillard.
15
+ */
18
16
  float torben(float m[], int n);
19
17
  } // namespace hashing
20
18
  } // namespace pdq
@@ -1,5 +1,6 @@
1
1
  # Copyright (c) Meta Platforms, Inc. and affiliates.
2
2
 
3
+ import os
3
4
  import subprocess
4
5
  import sys
5
6
  import argparse
@@ -14,7 +15,7 @@ import csv
14
15
  DIR = Path(__file__).parent
15
16
  VPDQ_DIR = DIR.parent
16
17
  SAMPLE_HASHES_DIR = VPDQ_DIR / "sample-hashes"
17
- EXEC_DIR = VPDQ_DIR / "cpp/build"
18
+ EXEC_DIR = VPDQ_DIR / "cpp/build/apps"
18
19
 
19
20
 
20
21
  def get_os() -> str:
@@ -130,7 +131,11 @@ def main():
130
131
 
131
132
  # Run the hashing and matching tests for single and multithreaded
132
133
  for thread_count in range(0, 2):
133
- print(f"Threads: {thread_count}")
134
+ if thread_count == 0:
135
+ num_cpu_cores = os.cpu_count()
136
+ print(f"Number of hashing threads: auto. Probably {num_cpu_cores} threads.")
137
+ else:
138
+ print(f"Number of hashing threads: {thread_count}")
134
139
  with TemporaryDirectory() as tempOutputHashFolder:
135
140
  tempOutputHashFolder = Path(tempOutputHashFolder)
136
141
 
@@ -0,0 +1,52 @@
1
+ # VPDQ library
2
+ # This will produce one library file: libvpdqlib
3
+
4
+ set(VPDQSOURCES
5
+ cpp/hashing/bufferhasher.cpp
6
+ cpp/hashing/filehasher.cpp
7
+ cpp/hashing/ffmpegutils.cpp
8
+ cpp/hashing/ffmpegwrapper.cpp
9
+ cpp/hashing/matchTwoHash.cpp
10
+ cpp/io/vpdqio.cpp
11
+ )
12
+
13
+ set(VPDQHEADERS
14
+ cpp/hashing/bufferhasher.h
15
+ cpp/hashing/filehasher.h
16
+ cpp/hashing/ffmpegutils.h
17
+ cpp/hashing/ffmpegwrapper.h
18
+ cpp/hashing/hasher.h
19
+ cpp/hashing/vpdqHashType.h
20
+ cpp/hashing/matchTwoHash.h
21
+ cpp/io/vpdqio.h
22
+ )
23
+
24
+ # Make an automatic library - will be static or dynamic based on user setting.
25
+ # Note: Including header files here helps IDEs, but is not required.
26
+ add_library(vpdqlib ${VPDQSOURCES} ${VPDQHEADERS})
27
+
28
+ # We need this directory, and users of the library will need it too.
29
+ target_include_directories(vpdqlib PUBLIC
30
+ # We go up a directory so that the source files can include the
31
+ # whole path, e.g. <vpdq/cpp/hashing/vpdqHashType.h>
32
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
33
+ )
34
+
35
+ target_link_libraries(vpdqlib PUBLIC PkgConfig::LIBAV Threads::Threads pdqlib)
36
+
37
+ # All users of this library will need at least C++14
38
+ target_compile_features(vpdqlib PUBLIC cxx_std_14)
39
+
40
+ # Turn on -fPIC
41
+ set_target_properties(vpdqlib PROPERTIES POSITION_INDEPENDENT_CODE ON)
42
+
43
+ if(NOT MSVC)
44
+ set(VPDQ_WARNING_FLAGS
45
+ -Wall
46
+ -Wextra
47
+ -Wpedantic
48
+ -Werror
49
+ -Wno-error=deprecated-declarations # Avoid compile error if something gets deprecated. Warning will still show up.
50
+ )
51
+ target_compile_options(vpdqlib PRIVATE ${VPDQ_WARNING_FLAGS})
52
+ endif()
@@ -0,0 +1,68 @@
1
+ // ================================================================
2
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ // ================================================================
4
+
5
+ #include <algorithm>
6
+ #include <cstdio>
7
+ #include <fstream>
8
+ #include <iostream>
9
+ #include <memory>
10
+ #include <string>
11
+
12
+ #include <vpdq/cpp/hashing/ffmpegutils.h>
13
+ #include <vpdq/cpp/hashing/ffmpegwrapper.h>
14
+
15
+ extern "C" {
16
+ #include <libavcodec/avcodec.h>
17
+ #include <libavformat/avformat.h>
18
+ #include <libavutil/frame.h>
19
+ #include <libavutil/imgutils.h>
20
+ #include <libavutil/log.h>
21
+ #include <libavutil/mem.h>
22
+ #include <libswscale/swscale.h>
23
+ }
24
+
25
+ namespace facebook {
26
+ namespace vpdq {
27
+ namespace hashing {
28
+ namespace ffmpeg {
29
+
30
+ void saveFrameToFile(AVFramePtr frame, const std::string& filename) {
31
+ if (!frame) {
32
+ throw std::invalid_argument("Cannot save frame to file. Frame is null.");
33
+ }
34
+
35
+ std::ofstream outfile(filename, std::ios::out | std::ios::binary);
36
+ if (!outfile) {
37
+ throw std::runtime_error("Cannot save frame to file " + filename);
38
+ }
39
+
40
+ for (int y = 0; y < frame->height; y++) {
41
+ outfile.write(
42
+ reinterpret_cast<const char*>(frame->data[0] + y * frame->linesize[0]),
43
+ frame->width * 3);
44
+ }
45
+ outfile.close();
46
+ }
47
+
48
+ AVFramePtr createRGB24Frame(size_t const width, size_t const height) {
49
+ AVFramePtr frame(av_frame_alloc());
50
+ if (frame.get() == nullptr) {
51
+ throw std::bad_alloc();
52
+ }
53
+
54
+ frame->format = PIXEL_FORMAT;
55
+ frame->width = width;
56
+ frame->height = height;
57
+
58
+ if (av_image_alloc(
59
+ frame->data, frame->linesize, width, height, PIXEL_FORMAT, 1) < 0) {
60
+ throw std::bad_alloc();
61
+ }
62
+ return frame;
63
+ }
64
+
65
+ } // namespace ffmpeg
66
+ } // namespace hashing
67
+ } // namespace vpdq
68
+ } // namespace facebook