summaryrefslogtreecommitdiffstats
path: root/subcmds/forall.py
blob: dbf26f0bad30ab1a5f6aa48e887e9ecdfa309514 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# -*- coding:utf-8 -*-
#
# Copyright (C) 2008 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
import errno
import multiprocessing
import re
import os
import signal
import sys
import subprocess

from color import Coloring
from command import Command, MirrorSafeCommand
import platform_utils

_CAN_COLOR = [
    'branch',
    'diff',
    'grep',
    'log',
]


class ForallColoring(Coloring):
  def __init__(self, config):
    Coloring.__init__(self, config, 'forall')
    self.project = self.printer('project', attr='bold')


class Forall(Command, MirrorSafeCommand):
  common = False
  helpSummary = "Run a shell command in each project"
  helpUsage = """
%prog [<project>...] -c <command> [<arg>...]
%prog -r str1 [str2] ... -c <command> [<arg>...]"
"""
  helpDescription = """
Executes the same shell command in each project.

The -r option allows running the command only on projects matching
regex or wildcard expression.

# Output Formatting

The -p option causes '%prog' to bind pipes to the command's stdin,
stdout and stderr streams, and pipe all output into a continuous
stream that is displayed in a single pager session.  Project headings
are inserted before the output of each command is displayed.  If the
command produces no output in a project, no heading is displayed.

The formatting convention used by -p is very suitable for some
types of searching, e.g. `repo forall -p -c git log -SFoo` will
print all commits that add or remove references to Foo.

The -v option causes '%prog' to display stderr messages if a
command produces output only on stderr.  Normally the -p option
causes command output to be suppressed until the command produces
at least one byte of output on stdout.

# Environment

pwd is the project's working directory.  If the current client is
a mirror client, then pwd is the Git repository.

REPO_PROJECT is set to the unique name of the project.

REPO_PATH is the path relative the the root of the client.

REPO_REMOTE is the name of the remote system from the manifest.

REPO_LREV is the name of the revision from the manifest, translated
to a local tracking branch.  If you need to pass the manifest
revision to a locally executed git command, use REPO_LREV.

REPO_RREV is the name of the revision from the manifest, exactly
as written in the manifest.

REPO_COUNT is the total number of projects being iterated.

REPO_I is the current (1-based) iteration count. Can be used in
conjunction with REPO_COUNT to add a simple progress indicator to your
command.

REPO__* are any extra environment variables, specified by the
"annotation" element under any project element.  This can be useful
for differentiating trees based on user-specific criteria, or simply
annotating tree details.

shell positional arguments ($1, $2, .., $#) are set to any arguments
following <command>.

Example: to list projects:

  %prog -c 'echo $REPO_PROJECT'

Notice that $REPO_PROJECT is quoted to ensure it is expanded in
the context of running <command> instead of in the calling shell.

Unless -p is used, stdin, stdout, stderr are inherited from the
terminal and are not redirected.

If -e is used, when a command exits unsuccessfully, '%prog' will abort
without iterating through the remaining projects.
"""

  def _Options(self, p):
    def cmd(option, opt_str, value, parser):
      setattr(parser.values, option.dest, list(parser.rargs))
      while parser.rargs:
        del parser.rargs[0]
    p.add_option('-r', '--regex',
                 dest='regex', action='store_true',
                 help="Execute the command only on projects matching regex or wildcard expression")
    p.add_option('-i', '--inverse-regex',
                 dest='inverse_regex', action='store_true',
                 help="Execute the command only on projects not matching regex or wildcard expression")
    p.add_option('-g', '--groups',
                 dest='groups',
                 help="Execute the command only on projects matching the specified groups")
    p.add_option('-c', '--command',
                 help='Command (and arguments) to execute',
                 dest='command',
                 action='callback',
                 callback=cmd)
    p.add_option('-e', '--abort-on-errors',
                 dest='abort_on_errors', action='store_true',
                 help='Abort if a command exits unsuccessfully')
    p.add_option('--ignore-missing', action='store_true',
                 help='Silently skip & do not exit non-zero due missing '
                      'checkouts')

    g = p.add_option_group('Output')
    g.add_option('-p',
                 dest='project_header', action='store_true',
                 help='Show project headers before output')
    g.add_option('-v', '--verbose',
                 dest='verbose', action='store_true',
                 help='Show command error messages')
    g.add_option('-j', '--jobs',
                 dest='jobs', action='store', type='int', default=1,
                 help='number of commands to execute simultaneously')

  def WantPager(self, opt):
    return opt.project_header and opt.jobs == 1

  def _SerializeProject(self, project):
    """ Serialize a project._GitGetByExec instance.

    project._GitGetByExec is not pickle-able. Instead of trying to pass it
    around between processes, make a dict ourselves containing only the
    attributes that we need.

    """
    if not self.manifest.IsMirror:
      lrev = project.GetRevisionId()
    else:
      lrev = None
    return {
        'name': project.name,
        'relpath': project.relpath,
        'remote_name': project.remote.name,
        'lrev': lrev,
        'rrev': project.revisionExpr,
        'annotations': dict((a.name, a.value) for a in project.annotations),
        'gitdir': project.gitdir,
        'worktree': project.worktree,
    }

  def ValidateOptions(self, opt, args):
    if not opt.command:
      self.Usage()

  def Execute(self, opt, args):
    cmd = [opt.command[0]]

    shell = True
    if re.compile(r'^[a-z0-9A-Z_/\.-]+$').match(cmd[0]):
      shell = False

    if shell:
      cmd.append(cmd[0])
    cmd.extend(opt.command[1:])

    if opt.project_header \
            and not shell \
            and cmd[0] == 'git':
      # If this is a direct git command that can enable colorized
      # output and the user prefers coloring, add --color into the
      # command line because we are going to wrap the command into
      # a pipe and git won't know coloring should activate.
      #
      for cn in cmd[1:]:
        if not cn.startswith('-'):
          break
      else:
        cn = None
      if cn and cn in _CAN_COLOR:
        class ColorCmd(Coloring):
          def __init__(self, config, cmd):
            Coloring.__init__(self, config, cmd)
        if ColorCmd(self.manifest.manifestProject.config, cn).is_on:
          cmd.insert(cmd.index(cn) + 1, '--color')

    mirror = self.manifest.IsMirror
    rc = 0

    smart_sync_manifest_name = "smart_sync_override.xml"
    smart_sync_manifest_path = os.path.join(
        self.manifest.manifestProject.worktree, smart_sync_manifest_name)

    if os.path.isfile(smart_sync_manifest_path):
      self.manifest.Override(smart_sync_manifest_path)

    if opt.regex:
      projects = self.FindProjects(args)
    elif opt.inverse_regex:
      projects = self.FindProjects(args, inverse=True)
    else:
      projects = self.GetProjects(args, groups=opt.groups)

    os.environ['REPO_COUNT'] = str(len(projects))

    pool = multiprocessing.Pool(opt.jobs, InitWorker)
    try:
      config = self.manifest.manifestProject.config
      results_it = pool.imap(
          DoWorkWrapper,
          self.ProjectArgs(projects, mirror, opt, cmd, shell, config))
      pool.close()
      for r in results_it:
        rc = rc or r
        if r != 0 and opt.abort_on_errors:
          raise Exception('Aborting due to previous error')
    except (KeyboardInterrupt, WorkerKeyboardInterrupt):
      # Catch KeyboardInterrupt raised inside and outside of workers
      print('Interrupted - terminating the pool')
      pool.terminate()
      rc = rc or errno.EINTR
    except Exception as e:
      # Catch any other exceptions raised
      print('Got an error, terminating the pool: %s: %s' %
            (type(e).__name__, e),
            file=sys.stderr)
      pool.terminate()
      rc = rc or getattr(e, 'errno', 1)
    finally:
      pool.join()
    if rc != 0:
      sys.exit(rc)

  def ProjectArgs(self, projects, mirror, opt, cmd, shell, config):
    for cnt, p in enumerate(projects):
      try:
        project = self._SerializeProject(p)
      except Exception as e:
        print('Project list error on project %s: %s: %s' %
              (p.name, type(e).__name__, e),
              file=sys.stderr)
        return
      except KeyboardInterrupt:
        print('Project list interrupted',
              file=sys.stderr)
        return
      yield [mirror, opt, cmd, shell, cnt, config, project]


class WorkerKeyboardInterrupt(Exception):
  """ Keyboard interrupt exception for worker processes. """
  pass


def InitWorker():
  signal.signal(signal.SIGINT, signal.SIG_IGN)


def DoWorkWrapper(args):
  """ A wrapper around the DoWork() method.

  Catch the KeyboardInterrupt exceptions here and re-raise them as a different,
  ``Exception``-based exception to stop it flooding the console with stacktraces
  and making the parent hang indefinitely.

  """
  project = args.pop()
  try:
    return DoWork(project, *args)
  except KeyboardInterrupt:
    print('%s: Worker interrupted' % project['name'])
    raise WorkerKeyboardInterrupt()


def DoWork(project, mirror, opt, cmd, shell, cnt, config):
  env = os.environ.copy()

  def setenv(name, val):
    if val is None:
      val = ''
    if hasattr(val, 'encode'):
      val = val.encode()
    env[name] = val

  setenv('REPO_PROJECT', project['name'])
  setenv('REPO_PATH', project['relpath'])
  setenv('REPO_REMOTE', project['remote_name'])
  setenv('REPO_LREV', project['lrev'])
  setenv('REPO_RREV', project['rrev'])
  setenv('REPO_I', str(cnt + 1))
  for name in project['annotations']:
    setenv("REPO__%s" % (name), project['annotations'][name])

  if mirror:
    setenv('GIT_DIR', project['gitdir'])
    cwd = project['gitdir']
  else:
    cwd = project['worktree']

  if not os.path.exists(cwd):
    # Allow the user to silently ignore missing checkouts so they can run on
    # partial checkouts (good for infra recovery tools).
    if opt.ignore_missing:
      return 0
    if ((opt.project_header and opt.verbose)
            or not opt.project_header):
      print('skipping %s/' % project['relpath'], file=sys.stderr)
    return 1

  if opt.project_header:
    stdin = subprocess.PIPE
    stdout = subprocess.PIPE
    stderr = subprocess.PIPE
  else:
    stdin = None
    stdout = None
    stderr = None

  p = subprocess.Popen(cmd,
                       cwd=cwd,
                       shell=shell,
                       env=env,
                       stdin=stdin,
                       stdout=stdout,
                       stderr=stderr)

  if opt.project_header:
    out = ForallColoring(config)
    out.redirect(sys.stdout)
    empty = True
    errbuf = ''

    p.stdin.close()
    s_in = platform_utils.FileDescriptorStreams.create()
    s_in.add(p.stdout, sys.stdout, 'stdout')
    s_in.add(p.stderr, sys.stderr, 'stderr')

    while not s_in.is_done:
      in_ready = s_in.select()
      for s in in_ready:
        buf = s.read().decode()
        if not buf:
          s.close()
          s_in.remove(s)
          continue

        if not opt.verbose:
          if s.std_name == 'stderr':
            errbuf += buf
            continue

        if empty and out:
          if not cnt == 0:
            out.nl()

          if mirror:
            project_header_path = project['name']
          else:
            project_header_path = project['relpath']
          out.project('project %s/', project_header_path)
          out.nl()
          out.flush()
          if errbuf:
            sys.stderr.write(errbuf)
            sys.stderr.flush()
            errbuf = ''
          empty = False

        s.dest.write(buf)
        s.dest.flush()

  r = p.wait()
  return r