feat(cli): --concurrency also controls the number of batch exiftool processes

pull/99/head
Romain 6 years ago
parent a5740fb94e
commit f7d7b827e5

@ -67,6 +67,11 @@ const OPTIONS = {
type: 'boolean',
'default': false
},
'concurrency': {
description: 'Number of parallel parsing/processing operations',
type: 'number',
'default': os.cpus().length
},
// ------------------------------------
// Album options
@ -191,12 +196,8 @@ const OPTIONS = {
group: 'Album options:',
description: 'How albums are named in <date> mode [moment.js pattern]',
'default': 'YYYY-MM'
},
'concurrency': {
description: 'Modify the concurrency of processing, if not set defaults to number of cores on system.',
type: 'number',
'default': os.cpus().length
}
}
// explicitly pass <process.argv> so we can unit test this logic
@ -255,7 +256,7 @@ exports.get = (args) => {
usageStats: opts['usage-stats'],
log: opts['log'],
dryRun: opts['dry-run'],
concurrencyOpt: opts['concurrency']
concurrency: opts['concurrency']
}
}

@ -15,6 +15,9 @@ stream.on('data', entry => console.log(`Processed ${entry.SourceFile}`))
stream.on('end', () => console.log('Finished'))
```
The number of parallel `exiftool` processes defaults to the CPU count.
It can be overridden by `exiftool.parse(root, files, count)`.
Each stream entry will be an object in the following format.
```js
@ -54,10 +57,11 @@ Some notes on the structure:
- uses `exiftool` in batch mode, instead of spawning 1 instance per file
- runs 1 `exiftool` process per core to speed-up parsing
The following stats were captured while processing 10,000 photos stored on an SSD drive:
The following stats were captured while processing a large number of photos stored on an SSD drive:
| Metric | Value |
|--------|-------|
| Number of photos | 10,000 |
| Total time | 30 sec |
| Peak throughput | 300 photos / sec |

@ -5,12 +5,12 @@ const exiftool = require('./stream.js')
const os = require('os')
/*
Fans out the list of files to multiple exiftool processes (= CPU count)
Fans out the list of files to multiple exiftool processes (default = CPU count)
Returns a single stream of javascript objects, parsed from the JSON response
*/
exports.parse = (rootFolder, filePaths) => {
exports.parse = (rootFolder, filePaths, concurrency) => {
// create several buckets of work
const workers = os.cpus().length
const workers = concurrency || os.cpus().length
const buckets = _.chunk(filePaths, Math.ceil(filePaths.length / workers))
debug(`Split files into ${buckets.length} batches for exiftool`)
// create several <exiftool> streams that can work in parallel

@ -21,7 +21,7 @@ class Index {
/*
Index all the files in <media> and store into <database>
*/
update (mediaFolder) {
update (mediaFolder, concurrency) {
// will emit many different events
const emitter = new EventEmitter()
@ -80,7 +80,7 @@ class Index {
// call <exiftool> on added and modified files
// and write each entry to the database
const stream = exiftool.parse(mediaFolder, toProcess)
const stream = exiftool.parse(mediaFolder, toProcess, concurrency)
stream.on('data', entry => {
const timestamp = moment(entry.File.FileModifyDate, EXIF_DATE_FORMAT).valueOf()
insertStatement.run(entry.SourceFile, timestamp, JSON.stringify(entry))

@ -19,7 +19,7 @@ exports.run = function (opts, callback) {
return new Observable(observer => {
const picasaReader = new Picasa()
const index = new Index(path.join(opts.output, 'thumbsup.db'))
const emitter = index.update(opts.input)
const emitter = index.update(opts.input, opts.concurrency)
const files = []
emitter.on('stats', stats => {

@ -10,7 +10,7 @@ exports.run = function (files, opts, parentTask) {
// wrap each job in a Listr task that returns a Promise
const tasks = jobs.map(job => listrTaskFromJob(job, opts.output))
const listr = new ListrWorkQueue(tasks, {
concurrent: opts.concurrencyOpt,
concurrent: opts.concurrency,
update: (done, total) => {
const progress = done === total ? '' : `(${done}/${total})`
parentTask.title = `Processing media ${progress}`

Loading…
Cancel
Save