VisIt/build_Juqueen: LaunchService_socket-bridge-host.C

File LaunchService_socket-bridge-host.C, 15.6 KB (added by Jens Henrik Goebbert, 9 years ago)
Line 
1/*****************************************************************************
2*
3* Copyright (c) 2000 - 2015, Lawrence Livermore National Security, LLC
4* Produced at the Lawrence Livermore National Laboratory
5* LLNL-CODE-442911
6* All rights reserved.
7*
8* This file is part of VisIt. For details, see https://visit.llnl.gov/. The
9* full copyright notice is contained in the file COPYRIGHT located at the root
10* of the VisIt distribution or at http://www.llnl.gov/visit/copyright.html.
11*
12* Redistribution and use in source and binary forms, with or without
13* modification, are permitted provided that the following conditions are met:
14*
15* - Redistributions of source code must retain the above copyright notice,
16* this list of conditions and the disclaimer below.
17* - Redistributions in binary form must reproduce the above copyright notice,
18* this list of conditions and the disclaimer (as noted below) in the
19* documentation and/or other materials provided with the distribution.
20* - Neither the name of the LLNS/LLNL nor the names of its contributors may
21* be used to endorse or promote products derived from this software without
22* specific prior written permission.
23*
24* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27* ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY,
28* LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY
29* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
35* DAMAGE.
36*
37*****************************************************************************/
38#include <LaunchService.h>
39
40#include <cstdio>
41#include <cstdlib>
42#include <cstring>
43#include <ctime>
44
45#if defined(_WIN32)
46#include <process.h>
47#include <winsock2.h>
48#include <windows.h>
49#else
50#include <unistd.h> // alarm
51#include <signal.h>
52#include <sys/types.h>
53#include <sys/wait.h>
54#endif
55
56#include <DebugStream.h>
57#include <FileFunctions.h>
58#include <ParentProcess.h>
59#include <VisItException.h>
60#include <SocketBridge.h>
61#include <SocketConnection.h>
62
63// Capture child output on UNIX & Mac.
64#if !defined(_WIN32)
65#define CAPTURE_CHILD_OUTPUT
66#endif
67
68//
69// Static member variables.
70//
71std::map<int, bool> LaunchService::childDied;
72
73// Pass this information to the thread to set up the bridge.
74struct BridgeInfo
75{
76 int newlocalport;
77 int oldlocalport;
78 int bufferSize;
79 bool logging;
80};
81
82// ****************************************************************************
83// Function: CreateSocketBridge
84//
85// Purpose:
86// Initiate the socket bridge.
87//
88// Arguments:
89// ports (really an int[2]):
90// ports[0]=new local port
91// ports[1]=old local port
92//
93// Programmer: Jeremy Meredith
94// Creation: June 5, 2007
95//
96// Modifications:
97// Thomas R. Treadway, Mon Oct 8 13:27:42 PDT 2007
98// Backing out SSH tunneling on Panther (MacOS X 10.3)
99//
100// Brad Whitlock, Tue Oct 14 17:12:46 PDT 2014
101// Pass the bridge set up information in a struct. Set the buffer size.
102//
103// ****************************************************************************
104static void CreateSocketBridge(void *ptr)
105{
106 BridgeInfo *info = (BridgeInfo *)ptr;
107 SocketBridge bridge(info->newlocalport, info->oldlocalport);
108 bridge.SetLogging(info->logging);
109 if(info->bufferSize > 0)
110 bridge.SetBufferSize(info->bufferSize);
111 bridge.Bridge();
112}
113
114// ****************************************************************************
115// Method: LaunchService::LaunchService
116//
117// Purpose:
118// Constructor.
119//
120// Programmer: Brad Whitlock
121// Creation: Mon Nov 28 17:11:19 PST 2011
122//
123// Modifications:
124//
125// ****************************************************************************
126
127LaunchService::LaunchService()
128{
129}
130
131// ****************************************************************************
132// Method: LaunchService::~LaunchService
133//
134// Purpose:
135// Destructor.
136//
137// Programmer: Brad Whitlock
138// Creation: Mon Nov 28 17:11:19 PST 2011
139//
140// Modifications:
141//
142// ****************************************************************************
143
144LaunchService::~LaunchService()
145{
146}
147
148// ****************************************************************************
149// Method: LaunchService::DeadChildHandler
150//
151// Purpose:
152// Signal handler for a SIGCHLD even while waiting for remote connections.
153// Catch a child that died and mark it's success or failure in the
154// childDied array.
155//
156// Programmer: Brad Whitlock
157// Creation: Mon May 5 11:18:55 PDT 2003
158//
159// Modifications:
160//
161// ****************************************************************************
162
163void
164LaunchService::DeadChildHandler(int)
165{
166#if !defined(_WIN32)
167 int status;
168 int pid;
169 pid = wait(&status);
170
171 childDied[pid] = (status == 0 ? false : true);
172
173 signal(SIGCHLD, DeadChildHandler);
174#endif
175}
176
177// ****************************************************************************
178// Method: LaunchService::SetupGatewaySocketBridgeIfNeeded
179//
180// Purpose:
181// If SSH tunneling is enabled and we're about to launch a parallel
182// engine, we need to set up a local port from any incoming host
183// that gets forwarded through the appropriate SSH tunnel. We cannot
184// access SSH tunnels at the login node for a cluster from the
185// compute nodes, because by default SSH only listens for connections
186// from localhost.
187//
188// The launch arguments containing the login node forward ("localhost":port)
189// are also converted to the new bridge (loginnode:newport);
190//
191// Arguments:
192// launchArgs the launch arguments (these will be modified in-place!)
193//
194// Programmer: Jeremy Meredith
195// Creation: May 24, 2007
196//
197// Modifications:
198// Thomas R. Treadway, Mon Oct 8 13:27:42 PDT 2007
199// Backing out SSH tunneling on Panther (MacOS X 10.3)
200//
201// Brad Whitlock, Mon Apr 27 16:31:23 PST 2009
202// I changed the routine so the check for setting up the bridge is passed
203// in rather than calculated in here from launch arguments.
204//
205// Brad Whitlock, Tue Oct 14 17:16:31 PDT 2014
206// Allow the buffer size to be passed in to the thread that makes the bridge.
207//
208// ****************************************************************************
209
210bool
211LaunchService::SetupGatewaySocketBridgeIfNeeded(stringVector &launchArgs)
212{
213 const char *mName="LaunchService::SetupGatewaySocketBridgeIfNeeded: ";
214
215 // Get the port and host.
216 int oldlocalport = -1;
217 int portargument = -1;
218 int hostargument = -1;
219 int bufferSize = -1;
220 std::string sbHost;
221 for (size_t i=0; i<launchArgs.size(); i++)
222 {
223 if (i<launchArgs.size()-1 && launchArgs[i] == "-port")
224 {
225 oldlocalport = atoi(launchArgs[i+1].c_str());
226 portargument = i+1;
227 }
228 else if (i<launchArgs.size()-1 && launchArgs[i] == "-host")
229 {
230 hostargument = i+1;
231 }
232 else if (launchArgs[i] == "-fixed-buffer-sockets")
233 {
234 bufferSize = SocketConnection::FIXED_BUFFER_SIZE;
235 }
236 else if((i+1)<launchArgs.size() && launchArgs[i] == "-socket-bridge-host")
237 {
238 sbHost = launchArgs[i+1];
239 }
240 }
241
242 bool setupBridge = (portargument != -1 && hostargument != -1);
243 if(setupBridge)
244 {
245 debug5 << mName << "Setting up gateway port bridge.\n";
246 // find a new local port
247 int lowerRemotePort = 10000;
248 int upperRemotePort = 40000;
249 int remotePortRange = 1+upperRemotePort-lowerRemotePort;
250
251#if defined(_WIN32)
252 srand((unsigned)time(0));
253 int newlocalport = lowerRemotePort+(rand()%remotePortRange);
254#else
255 srand48(long(time(0)));
256 int newlocalport = lowerRemotePort+(lrand48()%remotePortRange);
257#endif
258 debug5 << mName << "Bridging new port INADDR_ANY/" << newlocalport
259 << " to tunneled port localhost/" << oldlocalport << endl;
260
261 // replace the host with my host name
262 if(sbHost.empty()) {
263 char hostname[1024];
264 gethostname(hostname,1024);
265 launchArgs[hostargument] = hostname;
266 }
267 else
268 launchArgs[hostargument] = sbHost;
269
270 // replace the launch argument port number
271 char newportstr[10];
272 sprintf(newportstr,"%d",newlocalport);
273 launchArgs[portargument] = newportstr;
274
275 // fork and start the socket bridge
276 BridgeInfo *info = new BridgeInfo;
277 info->newlocalport = newlocalport;
278 info->oldlocalport = oldlocalport;
279 info->bufferSize = bufferSize;
280 info->logging = DebugStream::Level1();
281#ifdef _WIN32
282 _beginthread(CreateSocketBridge, 0, (void*)info);
283#else
284 switch (fork())
285 {
286 case -1:
287 // Could not fork.
288 exit(-1); // HOOKS_IGNORE
289 break;
290 case 0:
291 {
292 // The child process will start the bridge
293 // Close stdin and any other file descriptors.
294 fclose(stdin);
295 for (int k = 3 ; k < 32 ; ++k)
296 {
297 close(k);
298 }
299 CreateSocketBridge((void*)info);
300 exit(0); // HOOKS_IGNORE
301 break;
302 }
303 default:
304 // Parent process continues on as normal
305 // Caution: there is a slight race condition here, though
306 // it would require the engine to launch and try to connect
307 // back before the child process got the bridge set up.
308 // The odds of this happening are low, but it should be fixed.
309 break;
310 }
311#endif
312 }
313 else
314 {
315 debug5 << mName << "Required -host or -port argument not found" << endl;
316 }
317
318 return setupBridge;
319}
320
321// ****************************************************************************
322// Method: LaunchService::LaunchProcess
323//
324// Purpose:
325// Launch a process, optionally reading its output.
326//
327// Arguments:
328//
329// Returns:
330//
331// Note:
332//
333// Programmer: Brad Whitlock
334// Creation: Mon Nov 28 17:07:30 PST 2011
335//
336// Modifications:
337//
338// ****************************************************************************
339
340void
341LaunchService::Launch(const stringVector &origLaunchArgs, bool doBridge,
342 SocketConnection **conn)
343{
344 const char *mName = "LaunchService::LaunchProcess: ";
345 debug1 << mName << "start" << endl;
346
347 stringVector launchArgs(origLaunchArgs);
348 bool readOutput = conn != NULL;
349 if(readOutput)
350 *conn = NULL;
351
352 if(launchArgs.empty())
353 return;
354
355 // Set up a socket bridge if we need one.
356 if(doBridge)
357 SetupGatewaySocketBridgeIfNeeded(launchArgs);
358
359 std::string remoteProgram(launchArgs[0]);
360 debug1 << mName << "LaunchRPC command = " << remoteProgram.c_str() << ", args=(";
361
362 // Make a command line array for the exec functions.
363 char **args = new char *[launchArgs.size() + 1];
364 memset(args, 0, (launchArgs.size() + 1) * sizeof(char *));
365 for(size_t i = 0; i < launchArgs.size(); ++i)
366 {
367 args[i] = new char[launchArgs[i].size() + 1];
368 strcpy(args[i], launchArgs[i].c_str());
369 if(i > 0) {
370 debug1 << launchArgs[i].c_str() << " ";
371 }
372 }
373 debug1 << ")" << endl;
374
375 // We have command line arguments for a command to launch.
376
377 int remoteProgramPid = 0;
378#if defined(_WIN32)
379 // Do it the WIN32 way where we use the _spawnvp system call.
380 remoteProgramPid = _spawnvp(_P_NOWAIT, remoteProgram.c_str(), args);
381#else
382 // Watch for a process who died
383 childDied[remoteProgramPid] = false;
384 signal(SIGCHLD, DeadChildHandler);
385
386#ifdef CAPTURE_CHILD_OUTPUT
387 // Create a pipe.
388 int f_des[2];
389 if(pipe(f_des) == -1)
390 readOutput = false;
391#endif
392
393 switch (remoteProgramPid = fork())
394 {
395 case -1:
396 // Could not fork.
397 exit(-1); // HOOKS_IGNORE
398 break;
399 case 0:
400 // Close stdin and any other file descriptors.
401 fclose(stdin);
402#ifdef CAPTURE_CHILD_OUTPUT
403 // Send the process' stdout/stderr to our pipe.
404 if(readOutput)
405 {
406 dup2(f_des[1], fileno(stdout));
407 dup2(f_des[1], fileno(stderr));
408 close(f_des[0]);
409 close(f_des[1]);
410 }
411#endif
412 for (int k = 3 ; k < 32 ; ++k)
413 {
414 close(k);
415 }
416 // Execute the process on the local machine.
417 remoteProgram = FileFunctions::ExpandPath(remoteProgram);
418 execvp(remoteProgram.c_str(), args);
419 exit(-1); // HOOKS_IGNORE
420 break; // OCD
421 default:
422#ifdef CAPTURE_CHILD_OUTPUT
423 if(readOutput)
424 close(f_des[1]);
425#endif
426 break;
427 }
428
429 // Stop watching for dead children
430 signal(SIGCHLD, SIG_DFL);
431
432 // If we had a dead child, try and connect back to the client that
433 // wanted to connect to the dead child.
434 if(childDied[remoteProgramPid])
435 {
436 // Create a temp array of pointers to the strings that we
437 // created and pass the temp array to the TerminateConnectionRequest
438 // method because it creates a ParentProcess object that will
439 // rearrange the pointers in the array.
440 char **args2 = new char *[launchArgs.size() + 1];
441 for(size_t i = 0; i < launchArgs.size(); ++i)
442 args2[i] = args[i];
443
444 // Tell the client that we could not connect.
445 TerminateConnectionRequest(launchArgs.size(), args2);
446
447 delete [] args2;
448 }
449#ifdef CAPTURE_CHILD_OUTPUT
450 else if(readOutput)
451 {
452 // Add the child's output pipe to the list of descriptors that
453 // we will check. We add the pipe file descriptor as a
454 // SocketConnection object.
455 *conn = new SocketConnection(f_des[0]);
456 }
457#endif
458#endif
459
460 // Free the command line storage.
461 for(size_t i = 0; i < launchArgs.size(); ++i)
462 delete [] args[i];
463 delete [] args;
464 debug1 << mName << "end" << endl;
465}
466
467// ****************************************************************************
468// Method: LaunchService::TerminateConnectionRequest
469//
470// Purpose:
471// Tells the client that we could not launch the desired process. This
472// lets the client fail gracefully instead of hang.
473//
474// Arguments:
475// argc : The number of arguments in argv.
476// argv : The argument array used to connect back to the client.
477//
478// Programmer: Brad Whitlock
479// Creation: Mon May 5 11:46:35 PDT 2003
480//
481// Modifications:
482// Brad Whitlock, Tue Jul 29 11:39:03 PDT 2003
483// Changed interface to ParentProcess::Connect.
484//
485// Mark C. Miller, Wed Jun 17 14:27:08 PDT 2009
486// Replaced CATCHALL(...) with CATCHALL.
487// ****************************************************************************
488
489void
490LaunchService::TerminateConnectionRequest(int argc, char *argv[])
491{
492 // Try and connect back to the process that initiated the request and
493 // send it a non-zero fail code so it will terminate the connection.
494 TRY
495 {
496 debug1 << "Terminating connection request to the client." << endl;
497
498 ParentProcess killer;
499
500 // Connect back to the process and say that we could not connect.
501 killer.Connect(1, 1, &argc, &argv, true, 3);
502 }
503 CATCHALL
504 {
505 // We know that we're going to get here, but no action is required.
506 }
507 ENDTRY
508}