feat(cli): --concurrency also controls the number of batch exiftool processes

6 years ago · f7d7b827e5
parent a5740fb94e
commit f7d7b827e5
6 changed files with 19 additions and 14 deletions
--- a/bin/options.js
+++ b/bin/options.js
@ -67,6 +67,11 @@ const OPTIONS = {
    type: 'boolean',
    'default': false
  },
+  'concurrency': {
+    description: 'Number of parallel parsing/processing operations',
+    type: 'number',
+    'default': os.cpus().length
+  },

  // ------------------------------------
  // Album options
@ -191,12 +196,8 @@ const OPTIONS = {
    group: 'Album options:',
    description: 'How albums are named in <date> mode [moment.js pattern]',
    'default': 'YYYY-MM'
-  },
-  'concurrency': {
-    description: 'Modify the concurrency of processing, if not set defaults to number of cores on system.',
-    type: 'number',
-    'default': os.cpus().length
  }
+
 }

 // explicitly pass <process.argv> so we can unit test this logic
@ -255,7 +256,7 @@ exports.get = (args) => {
    usageStats: opts['usage-stats'],
    log: opts['log'],
    dryRun: opts['dry-run'],
-    concurrencyOpt: opts['concurrency']
+    concurrency: opts['concurrency']
  }
 }

--- a/src/components/exiftool/README.md
+++ b/src/components/exiftool/README.md
@ -15,6 +15,9 @@ stream.on('data', entry => console.log(`Processed ${entry.SourceFile}`))
 stream.on('end', () => console.log('Finished'))
 ```

+The number of parallel `exiftool` processes defaults to the CPU count.
+It can be overridden by `exiftool.parse(root, files, count)`.
+
 Each stream entry will be an object in the following format.

 ```js
@ -54,10 +57,11 @@ Some notes on the structure:
 - uses `exiftool` in batch mode, instead of spawning 1 instance per file
 - runs 1 `exiftool` process per core to speed-up parsing

-The following stats were captured while processing 10,000 photos stored on an SSD drive:
+The following stats were captured while processing a large number of photos stored on an SSD drive:

 | Metric | Value |
 |--------|-------|
+| Number of photos | 10,000 |
 | Total time | 30 sec |
 | Peak throughput | 300 photos / sec |

--- a/src/components/exiftool/parallel.js
+++ b/src/components/exiftool/parallel.js
@ -5,12 +5,12 @@ const exiftool = require('./stream.js')
 const os = require('os')

 /*
-  Fans out the list of files to multiple exiftool processes (= CPU count)
+  Fans out the list of files to multiple exiftool processes (default = CPU count)
  Returns a single stream of javascript objects, parsed from the JSON response
 */
-exports.parse = (rootFolder, filePaths) => {
+exports.parse = (rootFolder, filePaths, concurrency) => {
  // create several buckets of work
-  const workers = os.cpus().length
+  const workers = concurrency || os.cpus().length
  const buckets = _.chunk(filePaths, Math.ceil(filePaths.length / workers))
  debug(`Split files into ${buckets.length} batches for exiftool`)
  // create several <exiftool> streams that can work in parallel
--- a/src/components/index/index.js
+++ b/src/components/index/index.js
@ -21,7 +21,7 @@ class Index {
  /*
    Index all the files in <media> and store into <database>
  */
-  update (mediaFolder) {
+  update (mediaFolder, concurrency) {
    // will emit many different events
    const emitter = new EventEmitter()

@ -80,7 +80,7 @@ class Index {

      // call <exiftool> on added and modified files
      // and write each entry to the database
-      const stream = exiftool.parse(mediaFolder, toProcess)
+      const stream = exiftool.parse(mediaFolder, toProcess, concurrency)
      stream.on('data', entry => {
        const timestamp = moment(entry.File.FileModifyDate, EXIF_DATE_FORMAT).valueOf()
        insertStatement.run(entry.SourceFile, timestamp, JSON.stringify(entry))
--- a/src/steps/step-index.js
+++ b/src/steps/step-index.js
@ -19,7 +19,7 @@ exports.run = function (opts, callback) {
  return new Observable(observer => {
    const picasaReader = new Picasa()
    const index = new Index(path.join(opts.output, 'thumbsup.db'))
-    const emitter = index.update(opts.input)
+    const emitter = index.update(opts.input, opts.concurrency)
    const files = []

    emitter.on('stats', stats => {
--- a/src/steps/step-process.js
+++ b/src/steps/step-process.js
@ -10,7 +10,7 @@ exports.run = function (files, opts, parentTask) {
  // wrap each job in a Listr task that returns a Promise
  const tasks = jobs.map(job => listrTaskFromJob(job, opts.output))
  const listr = new ListrWorkQueue(tasks, {
-    concurrent: opts.concurrencyOpt,
+    concurrent: opts.concurrency,
    update: (done, total) => {
      const progress = done === total ? '' : `(${done}/${total})`
      parentTask.title = `Processing media ${progress}`