FORM  4.1
parallel.c
Go to the documentation of this file.
1 
11 /* #[ License : */
12 /*
13  * Copyright (C) 1984-2013 J.A.M. Vermaseren
14  * When using this file you are requested to refer to the publication
15  * J.A.M.Vermaseren "New features of FORM" math-ph/0010025
16  * This is considered a matter of courtesy as the development was paid
17  * for by FOM the Dutch physics granting agency and we would like to
18  * be able to track its scientific use to convince FOM of its value
19  * for the community.
20  *
21  * This file is part of FORM.
22  *
23  * FORM is free software: you can redistribute it and/or modify it under the
24  * terms of the GNU General Public License as published by the Free Software
25  * Foundation, either version 3 of the License, or (at your option) any later
26  * version.
27  *
28  * FORM is distributed in the hope that it will be useful, but WITHOUT ANY
29  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
31  * details.
32  *
33  * You should have received a copy of the GNU General Public License along
34  * with FORM. If not, see <http://www.gnu.org/licenses/>.
35  */
36 /* #] License : */
37 /*
38  #[ includes :
39 */
40 #include "form3.h"
41 #include "vector.h"
42 
43 /*
44 #define PF_DEBUG_BCAST_LONG
45 #define PF_DEBUG_BCAST_BUF
46 #define PF_DEBUG_BCAST_PREDOLLAR
47 #define PF_DEBUG_BCAST_RHSEXPR
48 #define PF_DEBUG_BCAST_DOLLAR
49 #define PF_DEBUG_BCAST_PREVAR
50 #define PF_DEBUG_BCAST_CBUF
51 #define PF_DEBUG_BCAST_EXPRFLAGS
52 #define PF_DEBUG_REDUCE_DOLLAR
53 */
54 
55 /* mpi.c */
56 LONG PF_RealTime(int);
57 int PF_LibInit(int*, char***);
58 int PF_LibTerminate(int);
59 int PF_Probe(int*);
60 int PF_RecvWbuf(WORD*,LONG*,int*);
61 int PF_IRecvRbuf(PF_BUFFER*,int,int);
62 int PF_WaitRbuf(PF_BUFFER *,int,LONG *);
63 int PF_RawSend(int dest, void *buf, LONG l, int tag);
64 LONG PF_RawRecv(int *src,void *buf,LONG thesize,int *tag);
65 int PF_RawProbe(int *src, int *tag, int *bytesize);
66 
67 /* Private functions */
68 
69 static int PF_WaitAllSlaves(void);
70 
71 static void PF_PackRedefinedPreVars(void);
72 static void PF_UnpackRedefinedPreVars(void);
73 
74 static int PF_Wait4MasterIP(int tag);
75 static int PF_DoOneExpr(void);
76 static int PF_ReadMaster(void);/*reads directly to its scratch!*/
77 static int PF_Slave2MasterIP(int src);/*both master and slave*/
78 static int PF_Master2SlaveIP(int dest, EXPRESSIONS e);
79 static int PF_WalkThrough(WORD *t, LONG l, LONG chunk, LONG *count);
80 static int PF_SendChunkIP(FILEHANDLE *curfile, POSITION *position, int to, LONG thesize);
81 static int PF_RecvChunkIP(FILEHANDLE *curfile, int from, LONG thesize);
82 
83 static void PF_ReceiveErrorMessage(int src, int tag);
84 static void PF_CatchErrorMessages(int *src, int *tag);
85 static void PF_CatchErrorMessagesForAll(void);
86 static int PF_ProbeWithCatchingErrorMessages(int *src);
87 
88 /* Variables */
89 
90 PARALLELVARS PF;
91 #ifdef MPI2
92  WORD *PF_shared_buff;
93 #endif
94 
95 static LONG PF_goutterms; /* (master) Total out terms at PF_EndSort(), used in PF_Statistics(). */
96 static POSITION PF_exprsize; /* (master) The size of the expression at PF_EndSort(), used in PF_Processor(). */
97 
98 /*
99  This will work well only under Linux, see
100  #ifdef PF_WITH_SCHED_YIELD
101  below in PF_WaitAllSlaves().
102 */
103 #ifdef PF_WITH_SCHED_YIELD
104  #include <sched.h>
105 #endif
106 
107 #ifdef PF_WITHLOG
108  #define PRINTFBUF(TEXT,TERM,SIZE) { UBYTE lbuf[24]; if(PF.log){ WORD iii;\
109  NumToStr(lbuf,AC.CModule); \
110  fprintf(stderr,"[%d|%s] %s : ",PF.me,lbuf,(char*)TEXT);\
111  if(TERM){ fprintf(stderr,"[%d] ",(int)(*TERM));\
112  if((SIZE)<500 && (SIZE)>0) for(iii=1;iii<(SIZE);iii++)\
113  fprintf(stderr,"%d ",TERM[iii]); }\
114  fprintf(stderr,"\n");\
115  fflush(stderr); } }
116 #else
117  #define PRINTFBUF(TEXT,TERM,SIZE) {}
118 #endif
119 
124 #define SWAP(x, y) \
125  do { \
126  char swap_tmp__[sizeof(x) == sizeof(y) ? (int)sizeof(x) : -1]; \
127  memcpy(swap_tmp__, &y, sizeof(x)); \
128  memcpy(&y, &x, sizeof(x)); \
129  memcpy(&x, swap_tmp__, sizeof(x)); \
130  } while (0)
131 
135 #define PACK_LONG(p, n) \
136  do { \
137  *(p)++ = (UWORD)((ULONG)(n) & (ULONG)WORDMASK); \
138  *(p)++ = (UWORD)(((ULONG)(n) >> BITSINWORD) & (ULONG)WORDMASK); \
139  } while (0)
140 
144 #define UNPACK_LONG(p, n) \
145  do { \
146  (n) = (LONG)((((ULONG)(p)[1] & (ULONG)WORDMASK) << BITSINWORD) | ((ULONG)(p)[0] & (ULONG)WORDMASK)); \
147  (p) += 2; \
148  } while (0)
149 
153 #define CHECK(condition) _CHECK(condition, __FILE__, __LINE__)
154 #define _CHECK(condition, file, line) __CHECK(condition, file, line)
155 #define __CHECK(condition, file, line) \
156  do { \
157  if ( !(condition) ) { \
158  Error0("Fatal error at " file ":" #line); \
159  Terminate(-1); \
160  } \
161  } while (0)
162 
163 /*
164  * For debugging.
165  */
166 #define DBGOUT(lv1, lv2, a) do { if ( lv1 >= lv2 ) { printf a; fflush(stdout); } } while (0)
167 
168 /* (AN.ninterms of master) == max(AN.ninterms of slaves) == sum(PF_linterms of slaves) at EndSort(). */
169 #define DBGOUT_NINTERMS(lv, a)
170 /* #define DBGOUT_NINTERMS(lv, a) DBGOUT(1, lv, a) */
171 
172 /*
173  #] includes :
174  #[ statistics :
175  #[ variables : (should be part of a struct?)
176 */
177 static LONG PF_linterms; /* local interms on this proces: PF_Proces */
178 #define PF_STATS_SIZE 5
179 static LONG **PF_stats = NULL;/* space for collecting statistics of all procs */
180 static LONG PF_laststat; /* last realtime when statistics were printed */
181 static LONG PF_statsinterval;/* timeinterval for printing statistics */
182 /*
183  #] variables :
184  #[ PF_Statistics :
185 */
186 
195 static int PF_Statistics(LONG **stats, int proc)
196 {
197  GETIDENTITY
198  LONG real, cpu;
199  WORD rpart, cpart;
200  int i, j;
201 
202  if ( AT.SS == AM.S0 && PF.me == MASTER ) {
203  real = PF_RealTime(PF_TIME); rpart = (WORD)(real%100); real /= 100;
204 
205  if ( PF_stats == NULL ) {
206  PF_stats = (LONG**)Malloc1(PF.numtasks*sizeof(LONG*),"PF_stats 1");
207  for ( i = 0; i < PF.numtasks; i++ ) {
208  PF_stats[i] = (LONG*)Malloc1(PF_STATS_SIZE*sizeof(LONG),"PF_stats 2");
209  for ( j = 0; j < PF_STATS_SIZE; j++ ) PF_stats[i][j] = 0;
210  }
211  }
212  if ( proc > 0 ) for ( i = 0; i < PF_STATS_SIZE; i++ ) PF_stats[proc][i] = stats[0][i];
213 
214  if ( real >= PF_laststat + PF_statsinterval || proc == 0 ) {
215  LONG sum[PF_STATS_SIZE];
216 
217  for ( i = 0; i < PF_STATS_SIZE; i++ ) sum[i] = 0;
218  sum[0] = cpu = TimeCPU(1);
219  cpart = (WORD)(cpu%1000);
220  cpu /= 1000;
221  cpart /= 10;
222  if ( AC.OldParallelStats ) MesPrint("");
223  if ( proc > 0 && AC.StatsFlag && AC.OldParallelStats ) {
224  MesPrint("proc CPU in gen left byte");
225  MesPrint("%3d : %7l.%2i %10l",0,cpu,cpart,AN.ninterms);
226  }
227  else if ( AC.StatsFlag && AC.OldParallelStats ) {
228  MesPrint("proc CPU in gen out byte");
229  MesPrint("%3d : %7l.%2i %10l %10l %10l",0,cpu,cpart,AN.ninterms,0,PF_goutterms);
230  }
231 
232  for ( i = 1; i < PF.numtasks; i++ ) {
233  cpart = (WORD)(PF_stats[i][0]%1000);
234  cpu = PF_stats[i][0] / 1000;
235  cpart /= 10;
236  if ( AC.StatsFlag && AC.OldParallelStats )
237  MesPrint("%3d : %7l.%2i %10l %10l %10l",i,cpu,cpart,
238  PF_stats[i][2],PF_stats[i][3],PF_stats[i][4]);
239  for ( j = 0; j < PF_STATS_SIZE; j++ ) sum[j] += PF_stats[i][j];
240  }
241  cpart = (WORD)(sum[0]%1000);
242  cpu = sum[0] / 1000;
243  cpart /= 10;
244  if ( AC.StatsFlag && AC.OldParallelStats ) {
245  MesPrint("Sum = %7l.%2i %10l %10l %10l",cpu,cpart,sum[2],sum[3],sum[4]);
246  MesPrint("Real = %7l.%2i %20s (%l) %16s",
247  real,rpart,AC.Commercial,AC.CModule,EXPRNAME(AR.CurExpr));
248  MesPrint("");
249  }
250  PF_laststat = real;
251  }
252  }
253  return(0);
254 }
255 /*
256  #] PF_Statistics :
257  #] statistics :
258  #[ sort.c :
259  #[ sort variables :
260 */
261 
265 typedef struct NoDe {
266  struct NoDe *left;
267  struct NoDe *rght;
268  int lloser;
269  int rloser;
270  int lsrc;
271  int rsrc;
272 } NODE;
273 
274 /*
275  should/could be put in one struct
276 */
277 static NODE *PF_root; /* root of tree of losers */
278 static WORD PF_loser; /* this is the last loser */
279 static WORD **PF_term; /* these point to the active terms */
280 static WORD **PF_newcpos; /* new coeffs of merged terms */
281 static WORD *PF_newclen; /* length of new coefficients */
282 
283 /*
284  preliminary: could also write somewhere else?
285 */
286 
287 static WORD *PF_WorkSpace; /* used in PF_EndSort() */
288 static UWORD *PF_ScratchSpace; /* used in PF_GetLoser() */
289 
290 /*
291  #] sort variables :
292  #[ PF_AllocBuf :
293 */
294 
311 static PF_BUFFER *PF_AllocBuf(int nbufs, LONG bsize, WORD free)
312 {
313  PF_BUFFER *buf;
314  UBYTE *p, *stop;
315  LONG allocsize;
316  int i;
317 
318  allocsize =
319  (LONG)(sizeof(PF_BUFFER) + 4*nbufs*sizeof(WORD*) + (nbufs-free)*bsize);
320 
321  allocsize +=
322  (LONG)( nbufs * ( 2 * sizeof(MPI_Status)
323  + sizeof(MPI_Request)
324  + sizeof(MPI_Datatype)
325  ) );
326  allocsize += (LONG)( nbufs * 3 * sizeof(int) );
327 
328  if ( ( buf = (PF_BUFFER*)Malloc1(allocsize,"PF_AllocBuf") ) == NULL ) return(NULL);
329 
330  p = ((UBYTE *)buf) + sizeof(PF_BUFFER);
331  stop = ((UBYTE *)buf) + allocsize;
332 
333  buf->numbufs = nbufs;
334  buf->active = 0;
335 
336  buf->buff = (WORD**)p; p += buf->numbufs*sizeof(WORD*);
337  buf->fill = (WORD**)p; p += buf->numbufs*sizeof(WORD*);
338  buf->full = (WORD**)p; p += buf->numbufs*sizeof(WORD*);
339  buf->stop = (WORD**)p; p += buf->numbufs*sizeof(WORD*);
340  buf->status = (MPI_Status *)p; p += buf->numbufs*sizeof(MPI_Status);
341  buf->retstat = (MPI_Status *)p; p += buf->numbufs*sizeof(MPI_Status);
342  buf->request = (MPI_Request *)p; p += buf->numbufs*sizeof(MPI_Request);
343  buf->type = (MPI_Datatype *)p; p += buf->numbufs*sizeof(MPI_Datatype);
344  buf->index = (int *)p; p += buf->numbufs*sizeof(int);
345 
346  for ( i = 0; i < buf->numbufs; i++ ) buf->request[i] = MPI_REQUEST_NULL;
347  buf->tag = (int *)p; p += buf->numbufs*sizeof(int);
348  buf->from = (int *)p; p += buf->numbufs*sizeof(int);
349 /*
350  and finally the real bufferspace
351 */
352  for ( i = free; i < buf->numbufs; i++ ) {
353  buf->buff[i] = (WORD*)p; p += bsize;
354  buf->stop[i] = (WORD*)p;
355  buf->fill[i] = buf->full[i] = buf->buff[i];
356  }
357  if ( p != stop ) {
358  MesPrint("Error in PF_AllocBuf p = %x stop = %x\n",p,stop);
359  return(NULL);
360  }
361  return(buf);
362 }
363 
364 /*
365  #] PF_AllocBuf :
366  #[ PF_InitTree :
367 */
368 
380 static int PF_InitTree(void)
381 {
382  GETIDENTITY
383  PF_BUFFER **rbuf = PF.rbufs;
384  UBYTE *p, *stop;
385  int numrbufs,numtasks = PF.numtasks;
386  int i, j, src, numnodes;
387  int numslaves = numtasks - 1;
388  LONG size;
389 /*
390  #[ the buffers : for the new coefficients and the terms
391  we need one for each slave
392 */
393  if ( PF_term == NULL ) {
394  size = 2*numtasks*sizeof(WORD*) + sizeof(WORD)*
395  ( numtasks*(1 + AM.MaxTal) + (AM.MaxTer/sizeof(WORD)+1) + 2*(AM.MaxTal+2));
396 
397  PF_term = (WORD **)Malloc1(size,"PF_term");
398  stop = ((UBYTE*)PF_term) + size;
399  p = ((UBYTE*)PF_term) + numtasks*sizeof(WORD*);
400 
401  PF_newcpos = (WORD **)p; p += sizeof(WORD*) * numtasks;
402  PF_newclen = (WORD *)p; p += sizeof(WORD) * numtasks;
403  for ( i = 0; i < numtasks; i++ ) {
404  PF_newcpos[i] = (WORD *)p; p += sizeof(WORD)*AM.MaxTal;
405  PF_newclen[i] = 0;
406  }
407  PF_WorkSpace = (WORD *)p; p += AM.MaxTer+sizeof(WORD);
408  PF_ScratchSpace = (UWORD*)p; p += 2*(AM.MaxTal+2)*sizeof(UWORD);
409 
410  if ( p != stop ) { MesPrint("error in PF_InitTree"); return(-1); }
411  }
412 /*
413  #] the buffers :
414  #[ the receive buffers :
415 */
416  numrbufs = PF.numrbufs;
417 /*
418  this is the size we have in the combined sortbufs for one slave
419 */
420  size = (AT.SS->sTop2 - AT.SS->lBuffer - 1)/(PF.numtasks - 1);
421 
422  if ( rbuf == NULL ) {
423  if ( ( rbuf = (PF_BUFFER**)Malloc1(numtasks*sizeof(PF_BUFFER*), "Master: rbufs") ) == NULL ) return(-1);
424  if ( (rbuf[0] = PF_AllocBuf(1,0,1) ) == NULL ) return(-1);
425  for ( i = 1; i < numtasks; i++ ) {
426  if (!(rbuf[i] = PF_AllocBuf(numrbufs,sizeof(WORD)*size,1))) return(-1);
427  }
428  }
429  rbuf[0]->buff[0] = AT.SS->lBuffer;
430  rbuf[0]->full[0] = rbuf[0]->fill[0] = rbuf[0]->buff[0];
431  rbuf[0]->stop[0] = rbuf[1]->buff[0] = rbuf[0]->buff[0] + 1;
432  rbuf[1]->full[0] = rbuf[1]->fill[0] = rbuf[1]->buff[0];
433  for ( i = 2; i < numtasks; i++ ) {
434  rbuf[i-1]->stop[0] = rbuf[i]->buff[0] = rbuf[i-1]->buff[0] + size;
435  rbuf[i]->full[0] = rbuf[i]->fill[0] = rbuf[i]->buff[0];
436  }
437  rbuf[numtasks-1]->stop[0] = rbuf[numtasks-1]->buff[0] + size;
438 
439  for ( i = 1; i < numtasks; i++ ) {
440  for ( j = 0; j < rbuf[i]->numbufs; j++ ) {
441  rbuf[i]->full[j] = rbuf[i]->fill[j] = rbuf[i]->buff[j] + AM.MaxTer/sizeof(WORD) + 2;
442  }
443  PF_term[i] = rbuf[i]->fill[rbuf[i]->active];
444  *PF_term[i] = 0;
445  PF_IRecvRbuf(rbuf[i],rbuf[i]->active,i);
446  }
447  rbuf[0]->active = 0;
448  PF_term[0] = rbuf[0]->buff[0];
449  PF_term[0][0] = 0; /* PF_term[0] is used for a zero term. */
450  PF.rbufs = rbuf;
451 /*
452  #] the receive buffers :
453  #[ the actual tree :
454 
455  calculate number of nodes in mergetree and allocate space for them
456 */
457  if ( numslaves < 3 ) numnodes = 1;
458  else {
459  numnodes = 2;
460  while ( numnodes < numslaves ) numnodes *= 2;
461  numnodes -= 1;
462  }
463 
464  if ( PF_root == NULL )
465  if ( ( PF_root = (NODE*)Malloc1(sizeof(NODE)*numnodes,"nodes in mergtree") ) == NULL )
466  return(-1);
467 /*
468  then initialize all the nodes
469 */
470  src = 1;
471  for ( i = 0; i < numnodes; i++ ) {
472  if ( 2*(i+1) <= numnodes ) {
473  PF_root[i].left = &(PF_root[2*(i+1)-1]);
474  PF_root[i].lsrc = 0;
475  }
476  else {
477  PF_root[i].left = 0;
478  if ( src < numtasks ) PF_root[i].lsrc = src++;
479  else PF_root[i].lsrc = 0;
480  }
481  PF_root[i].lloser = 0;
482  }
483  for ( i = 0; i < numnodes; i++ ) {
484  if ( 2*(i+1)+1 <= numnodes ) {
485  PF_root[i].rght = &(PF_root[2*(i+1)]);
486  PF_root[i].rsrc = 0;
487  }
488  else {
489  PF_root[i].rght = 0;
490  if (src<numtasks) PF_root[i].rsrc = src++;
491  else PF_root[i].rsrc = 0;
492  }
493  PF_root[i].rloser = 0;
494  }
495 /*
496  #] the actual tree :
497 */
498  return(numnodes);
499 }
500 
501 /*
502  #] PF_InitTree :
503  #[ PF_PutIn :
504 */
505 
524 static WORD *PF_PutIn(int src)
525 {
526  int tag;
527  WORD im, r;
528  WORD *m1, *m2;
529  LONG size;
530  PF_BUFFER *rbuf = PF.rbufs[src];
531  int a = rbuf->active;
532  int next = a+1 >= rbuf->numbufs ? 0 : a+1 ;
533  WORD *lastterm = PF_term[src];
534  WORD *term = rbuf->fill[a];
535 
536  if ( src <= 0 ) return(PF_term[0]);
537 
538  if ( rbuf->full[a] == rbuf->buff[a] + AM.MaxTer/sizeof(WORD) + 2 ) {
539 /*
540  very first term from this src
541 */
542  tag = PF_WaitRbuf(rbuf,a,&size);
543  rbuf->full[a] += size;
544  if ( tag == PF_ENDBUFFER_MSGTAG ) *rbuf->full[a]++ = 0;
545  else if ( rbuf->numbufs > 1 ) {
546 /*
547  post a nonblock. recv. for the next buffer
548 */
549  rbuf->full[next] = rbuf->buff[next] + AM.MaxTer/sizeof(WORD) + 2;
550  size = (LONG)(rbuf->stop[next] - rbuf->full[next]);
551  PF_IRecvRbuf(rbuf,next,src);
552  }
553  }
554  if ( *term == 0 && term != rbuf->full[a] ) return(PF_term[0]);
555 /*
556  exception is for rare cases when the terms fitted exactly into buffer
557 */
558  if ( term + *term > rbuf->full[a] || term + 1 >= rbuf->full[a] ) {
559 newterms:
560  m1 = rbuf->buff[next] + AM.MaxTer/sizeof(WORD) + 1;
561  if ( *term < 0 || term == rbuf->full[a] ) {
562 /*
563  copy term and lastterm to the new buffer, so that they end at m1
564 */
565  m2 = rbuf->full[a] - 1;
566  while ( m2 >= term ) *m1-- = *m2--;
567  rbuf->fill[next] = term = m1 + 1;
568  m2 = lastterm + *lastterm - 1;
569  while ( m2 >= lastterm ) *m1-- = *m2--;
570  lastterm = m1 + 1;
571  }
572  else {
573 /*
574  copy beginning of term to the next buffer so that it ends at m1
575 */
576  m2 = rbuf->full[a] - 1;
577  while ( m2 >= term ) *m1-- = *m2--;
578  rbuf->fill[next] = term = m1 + 1;
579  }
580  if ( rbuf->numbufs == 1 ) {
581  rbuf->full[a] = rbuf->buff[a] + AM.MaxTer/sizeof(WORD) + 2;
582  size = (LONG)(rbuf->stop[a] - rbuf->full[a]);
583  PF_IRecvRbuf(rbuf,a,src);
584  }
585 /*
586  wait for new terms in the next buffer
587 */
588  rbuf->full[next] = rbuf->buff[next] + AM.MaxTer/sizeof(WORD) + 2;
589  tag = PF_WaitRbuf(rbuf,next,&size);
590  rbuf->full[next] += size;
591  if ( tag == PF_ENDBUFFER_MSGTAG ) {
592  *rbuf->full[next]++ = 0;
593  }
594  else if ( rbuf->numbufs > 1 ) {
595 /*
596  post a nonblock. recv. for active buffer, it is not needed anymore
597 */
598  rbuf->full[a] = rbuf->buff[a] + AM.MaxTer/sizeof(WORD) + 2;
599  size = (LONG)(rbuf->stop[a] - rbuf->full[a]);
600  PF_IRecvRbuf(rbuf,a,src);
601  }
602 /*
603  now savely make next buffer active
604 */
605  a = rbuf->active = next;
606  }
607 
608  if ( *term < 0 ) {
609 /*
610  We need to decompress the term
611 */
612  im = *term;
613  r = term[1] - im + 1;
614  m1 = term + 2;
615  m2 = lastterm - im + 1;
616  while ( ++im <= 0 ) *--m1 = *--m2;
617  *--m1 = r;
618  rbuf->fill[a] = term = m1;
619  if ( term + *term > rbuf->full[a] ) goto newterms;
620  }
621  rbuf->fill[a] += *term;
622  return(term);
623 }
624 
625 /*
626  #] PF_PutIn :
627  #[ PF_GetLoser :
628 */
629 
648 static int PF_GetLoser(NODE *n)
649 {
650  GETIDENTITY
651  WORD comp;
652 
653  if ( PF_loser == 0 ) {
654 /*
655  this is for the right initialization of the tree only
656 */
657  if ( n->left ) n->lloser = PF_GetLoser(n->left);
658  else {
659  n->lloser = n->lsrc;
660  if ( *(PF_term[n->lsrc] = PF_PutIn(n->lsrc)) == 0) n->lloser = 0;
661  }
662  PF_loser = 0;
663  if ( n->rght ) n->rloser = PF_GetLoser(n->rght);
664  else{
665  n->rloser = n->rsrc;
666  if ( *(PF_term[n->rsrc] = PF_PutIn(n->rsrc)) == 0 ) n->rloser = 0;
667  }
668  PF_loser = 0;
669  }
670  else if ( PF_loser == n->lloser ) {
671  if ( n->left ) n->lloser = PF_GetLoser(n->left);
672  else {
673  n->lloser = n->lsrc;
674  if ( *(PF_term[n->lsrc] = PF_PutIn(n->lsrc)) == 0 ) n->lloser = 0;
675  }
676  }
677  else if ( PF_loser == n->rloser ) {
678 newright:
679  if ( n->rght ) n->rloser = PF_GetLoser(n->rght);
680  else {
681  n->rloser = n->rsrc;
682  if ( *(PF_term[n->rsrc] = PF_PutIn(n->rsrc)) == 0 ) n->rloser = 0;
683  }
684  }
685  if ( n->lloser > 0 && n->rloser > 0 ) {
686  comp = CompareTerms(BHEAD PF_term[n->lloser],PF_term[n->rloser],(WORD)0);
687  if ( comp > 0 ) return(n->lloser);
688  else if (comp < 0 ) return(n->rloser);
689  else {
690 /*
691  #[ terms are equal :
692 */
693  WORD *lcpos, *rcpos;
694  UWORD *newcpos;
695  WORD lclen, rclen, newclen, newnlen;
696  SORTING *S = AT.SS;
697 
698  if ( S->PolyWise ) {
699 /*
700  #[ Here we work with PolyFun :
701 */
702  WORD *tt1, *w;
703  WORD r1,r2;
704  WORD *ml = PF_term[n->lloser];
705  WORD *mr = PF_term[n->rloser];
706 
707  if ( ( r1 = (int)*PF_term[n->lloser] ) <= 0 ) r1 = 20;
708  if ( ( r2 = (int)*PF_term[n->rloser] ) <= 0 ) r2 = 20;
709  tt1 = ml;
710  ml += S->PolyWise;
711  mr += S->PolyWise;
712  if ( S->PolyFlag == 2 ) {
713  w = poly_ratfun_add(BHEAD ml,mr);
714  if ( *tt1 + w[1] - ml[1] > AM.MaxTer/((LONG)sizeof(WORD)) ) {
715  MesPrint("Term too complex in PolyRatFun addition. MaxTermSize of %10l is too small",AM.MaxTer);
716  Terminate(-1);
717  }
718  AT.WorkPointer = w;
719  }
720  else {
721  w = AT.WorkPointer;
722  if ( w + ml[1] + mr[1] > AT.WorkTop ) {
723  MesPrint("A WorkSpace of %10l is too small",AM.WorkSize);
724  Terminate(-1);
725  }
726  AddArgs(BHEAD ml,mr,w);
727  }
728  r1 = w[1];
729  if ( r1 <= FUNHEAD || ( w[FUNHEAD] == -SNUMBER && w[FUNHEAD+1] == 0 ) ) {
730  goto cancelled;
731  }
732  if ( r1 == ml[1] ) {
733  NCOPY(ml,w,r1);
734  }
735  else if ( r1 < ml[1] ) {
736  r2 = ml[1] - r1;
737  mr = w + r1;
738  ml += ml[1];
739  while ( --r1 >= 0 ) *--ml = *--mr;
740  mr = ml - r2;
741  r1 = S->PolyWise;
742  while ( --r1 >= 0 ) *--ml = *--mr;
743  *ml -= r2;
744  PF_term[n->lloser] = ml;
745  }
746  else {
747  r2 = r1 - ml[1];
748  if ( r2 > 2*AM.MaxTal )
749  MesPrint("warning: new term in polyfun is large");
750  mr = tt1 - r2;
751  r1 = S->PolyWise;
752  ml = tt1;
753  *ml += r2;
754  PF_term[n->lloser] = mr;
755  NCOPY(mr,ml,r1);
756  r1 = w[1];
757  NCOPY(mr,w,r1);
758  }
759  PF_newclen[n->rloser] = 0;
760  PF_loser = n->rloser;
761  goto newright;
762 /*
763  #] Here we work with PolyFun :
764 */
765  }
766  if ( ( lclen = PF_newclen[n->lloser] ) != 0 ) lcpos = PF_newcpos[n->lloser];
767  else {
768  lcpos = PF_term[n->lloser];
769  lclen = *(lcpos += *lcpos - 1);
770  lcpos -= ABS(lclen) - 1;
771  }
772  if ( ( rclen = PF_newclen[n->rloser] ) != 0 ) rcpos = PF_newcpos[n->rloser];
773  else {
774  rcpos = PF_term[n->rloser];
775  rclen = *(rcpos += *rcpos - 1);
776  rcpos -= ABS(rclen) -1;
777  }
778  lclen = ( (lclen > 0) ? (lclen-1) : (lclen+1) ) >> 1;
779  rclen = ( (rclen > 0) ? (rclen-1) : (rclen+1) ) >> 1;
780  newcpos = PF_ScratchSpace;
781  if ( AddRat(BHEAD (UWORD *)lcpos,lclen,(UWORD *)rcpos,rclen,newcpos,&newnlen) ) return(-1);
782  if ( AN.ncmod != 0 ) {
783  if ( ( AC.modmode & POSNEG ) != 0 ) {
784  NormalModulus(newcpos,&newnlen);
785  }
786  if ( BigLong(newcpos,newnlen,(UWORD *)AC.cmod,ABS(AN.ncmod)) >=0 ) {
787  WORD ii;
788  SubPLon(newcpos,newnlen,(UWORD *)AC.cmod,ABS(AN.ncmod),newcpos,&newnlen);
789  newcpos[newnlen] = 1;
790  for ( ii = 1; ii < newnlen; ii++ ) newcpos[newnlen+ii] = 0;
791  }
792  }
793  if ( newnlen == 0 ) {
794 /*
795  terms cancel, get loser of left subtree and then of right subtree
796 */
797 cancelled:
798  PF_loser = n->lloser;
799  PF_newclen[n->lloser] = 0;
800  if ( n->left ) n->lloser = PF_GetLoser(n->left);
801  else {
802  n->lloser = n->lsrc;
803  if ( *(PF_term[n->lsrc] = PF_PutIn(n->lsrc)) == 0 ) n->lloser = 0;
804  }
805  PF_loser = n->rloser;
806  PF_newclen[n->rloser] = 0;
807  goto newright;
808  }
809  else {
810 /*
811  keep the left term and get the loser of right subtree
812 */
813  newnlen <<= 1;
814  newclen = ( newnlen > 0 ) ? ( newnlen + 1 ) : ( newnlen - 1 );
815  if ( newnlen < 0 ) newnlen = -newnlen;
816  PF_newclen[n->lloser] = newclen;
817  lcpos = PF_newcpos[n->lloser];
818  if ( newclen < 0 ) newclen = -newclen;
819  while ( newclen-- ) *lcpos++ = *newcpos++;
820  PF_loser = n->rloser;
821  PF_newclen[n->rloser] = 0;
822  goto newright;
823  }
824 /*
825  #] terms are equal :
826 */
827  }
828  }
829  if (n->lloser > 0) return(n->lloser);
830  if (n->rloser > 0) return(n->rloser);
831  return(0);
832 }
833 /*
834  #] PF_GetLoser :
835  #[ PF_EndSort :
836 */
837 
864 int PF_EndSort(void)
865 {
866  GETIDENTITY
867  FILEHANDLE *fout = AR.outfile;
868  PF_BUFFER *sbuf=PF.sbuf;
869  SORTING *S = AT.SS;
870  WORD *outterm,*pp;
871  LONG size, noutterms;
872  POSITION position, oldposition;
873  WORD i,cc;
874  int oldgzipCompress;
875 
876  if ( AT.SS != AT.S0 || !PF.parallel ) return 0;
877 
878  if ( PF.me != MASTER ) {
879 /*
880  #[ the slaves have to initialize their sendbuffer :
881 
882  this is a slave and it's PObuffer should be the minimum of the
883  sortiosize on the master and the POsize of our file.
884  First save the original PObuffer and POstop of the outfile
885 */
886  size = (S->sTop2 - S->lBuffer - 1)/(PF.numtasks - 1);
887  size -= (AM.MaxTer/sizeof(WORD) + 2);
888  if ( fout->POsize < (LONG)(size*sizeof(WORD)) ) size = fout->POsize/sizeof(WORD);
889  if ( sbuf == NULL ) {
890  if ( (sbuf = PF_AllocBuf(PF.numsbufs, size*sizeof(WORD), 1)) == NULL ) return -1;
891  sbuf->active = 0;
892  PF.sbuf = sbuf;
893  }
894  sbuf->buff[0] = fout->PObuffer;
895  sbuf->stop[0] = fout->PObuffer+size;
896  if ( sbuf->stop[0] > fout->POstop ) return -1;
897  for ( i = 0; i < PF.numsbufs; i++ )
898  sbuf->fill[i] = sbuf->full[i] = sbuf->buff[i];
899 
900  fout->PObuffer = sbuf->buff[sbuf->active];
901  fout->POstop = sbuf->stop[sbuf->active];
902  fout->POsize = size*sizeof(WORD);
903  fout->POfill = fout->POfull = fout->PObuffer;
904 /*
905  #] the slaves have to initialize their sendbuffer :
906 */
907  return(0);
908  }
909 /*
910  this waits for all slaves to be ready to send terms back
911 */
912  PF_WaitAllSlaves(); /* Note, the returned value should be 0 on success. */
913 /*
914  Now collect the terms of all slaves and merge them.
915  PF_GetLoser gives the position of the smallest term, which is the real
916  work. The smallest term needs to be copied to the outbuf: use PutOut.
917 */
918  PF_InitTree();
919  S->PolyFlag = AR.PolyFun ? AR.PolyFunType : 0;
920  *AR.CompressPointer = 0;
921  SeekScratch(fout, &position);
922  oldposition = position;
923  oldgzipCompress = AR.gzipCompress;
924  AR.gzipCompress = 0;
925 
926  noutterms = 0;
927 
928  while ( PF_loser >= 0 ) {
929  if ( (PF_loser = PF_GetLoser(PF_root)) == 0 ) break;
930  outterm = PF_term[PF_loser];
931  noutterms++;
932 
933  if ( PF_newclen[PF_loser] != 0 ) {
934 /*
935  #[ this is only when new coeff was too long :
936 */
937  outterm = PF_WorkSpace;
938  pp = PF_term[PF_loser];
939  cc = *pp;
940  while ( cc-- ) *outterm++ = *pp++;
941  outterm = (outterm[-1] > 0) ? outterm-outterm[-1] : outterm+outterm[-1];
942  if ( PF_newclen[PF_loser] > 0 ) cc = (WORD)PF_newclen[PF_loser] - 1;
943  else cc = -(WORD)PF_newclen[PF_loser] - 1;
944  pp = PF_newcpos[PF_loser];
945  while ( cc-- ) *outterm++ = *pp++;
946  *outterm++ = PF_newclen[PF_loser];
947  *PF_WorkSpace = outterm - PF_WorkSpace;
948  outterm = PF_WorkSpace;
949  *PF_newcpos[PF_loser] = 0;
950  PF_newclen[PF_loser] = 0;
951 /*
952  #] this is only when new coeff was too long :
953 */
954  }
955  PRINTFBUF("PF_EndSort to PutOut: ",outterm,*outterm);
956  PutOut(BHEAD outterm,&position,fout,1);
957  }
958  if ( FlushOut(&position,fout,0) ) {
959  AR.gzipCompress = oldgzipCompress;
960  return(-1);
961  }
962  S->TermsLeft = PF_goutterms = noutterms;
963  DIFPOS(PF_exprsize, position, oldposition);
964  AR.gzipCompress = oldgzipCompress;
965  return(1);
966 }
967 
968 /*
969  #] PF_EndSort :
970  #] sort.c :
971  #[ proces.c :
972  #[ variables :
973 */
974 
975 static WORD *PF_CurrentBracket;
976 
977 /*
978  #] variables :
979  #[ PF_GetTerm :
980 */
981 
1000 static WORD PF_GetTerm(WORD *term)
1001 {
1002  GETIDENTITY
1003  FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile;
1004  WORD i;
1005  WORD *next, *np, *last, *lp = 0, *nextstop, *tp=term;
1006 
1007  /* Only on the slaves. */
1008 
1009  AN.deferskipped = 0;
1010  if ( fi->POfill >= fi->POfull || fi->POfull == fi->PObuffer ) {
1011 ReceiveNew:
1012  {
1013 /*
1014  #[ receive new terms from master :
1015 */
1016  int src = MASTER, tag;
1017  int follow = 0;
1018  LONG size,cpu,space = 0;
1019 
1020  if ( PF.log ) {
1021  fprintf(stderr,"[%d] Starting to send to Master\n",PF.me);
1022  fflush(stderr);
1023  }
1024 
1025  cpu = TimeCPU(1);
1026  PF_PreparePack();
1027  PF_Pack(&cpu ,1,PF_LONG);
1028  PF_Pack(&space ,1,PF_LONG);
1029  PF_Pack(&PF_linterms ,1,PF_LONG);
1030  PF_Pack(&(AM.S0->GenTerms) ,1,PF_LONG);
1031  PF_Pack(&(AM.S0->TermsLeft),1,PF_LONG);
1032  PF_Pack(&follow ,1,PF_INT );
1033 
1034  if ( PF.log ) {
1035  fprintf(stderr,"[%d] Now sending with tag = %d\n",PF.me,PF_READY_MSGTAG);
1036  fflush(stderr);
1037  }
1038 
1039  PF_Send(MASTER, PF_READY_MSGTAG);
1040 
1041  if ( PF.log ) {
1042  fprintf(stderr,"[%d] returning from send\n",PF.me);
1043  fflush(stderr);
1044  }
1045 
1046  size = fi->POstop - fi->PObuffer - 1;
1047 #ifdef AbsolutelyExtra
1048  PF_Receive(MASTER,PF_ANY_MSGTAG,&src,&tag);
1049 #ifdef MPI2
1050  if ( tag == PF_TERM_MSGTAG ) {
1051  PF_Unpack(&size, 1, PF_LONG);
1052  if ( PF_Put_target(src) == 0 ) {
1053  printf("PF_Put_target error ...\n");
1054  }
1055  }
1056  else {
1057  PF_RecvWbuf(fi->PObuffer,&size,&src);
1058  }
1059 #else
1060  PF_RecvWbuf(fi->PObuffer,&size,&src);
1061 #endif
1062 #endif
1063  tag=PF_RecvWbuf(fi->PObuffer,&size,&src);
1064 
1065  fi->POfill = fi->PObuffer;
1066  /* Get AN.ninterms which sits in the first 2 WORDs. */
1067  {
1068  LONG ninterms;
1069  UNPACK_LONG(fi->POfill, ninterms);
1070  if ( *fi->POfill ) {
1071  DBGOUT_NINTERMS(2, ("PF.me=%d AN.ninterms=%d PF_linterms=%d ninterms=%d GET\n", (int)PF.me, (int)AN.ninterms, (int)PF_linterms, (int)ninterms));
1072  AN.ninterms = ninterms - 1;
1073  } else {
1074  DBGOUT_NINTERMS(2, ("PF.me=%d AN.ninterms=%d PF_linterms=%d ninterms=%d GETEND\n", (int)PF.me, (int)AN.ninterms, (int)PF_linterms, (int)ninterms));
1075  }
1076  }
1077  fi->POfull = fi->PObuffer + size;
1078  if ( tag == PF_ENDSORT_MSGTAG ) *fi->POfull++ = 0;
1079 /*
1080  #] receive new terms from master :
1081 */
1082  }
1083  if ( PF_CurrentBracket ) *PF_CurrentBracket = 0;
1084  }
1085  if ( *fi->POfill == 0 ) {
1086  fi->POfill = fi->POfull = fi->PObuffer;
1087  *term = 0;
1088  goto RegRet;
1089  }
1090  if ( AR.DeferFlag ) {
1091  if ( !PF_CurrentBracket ) {
1092 /*
1093  #[ alloc space :
1094 */
1095  PF_CurrentBracket =
1096  (WORD*)Malloc1(AM.MaxTer,"PF_CurrentBracket");
1097  *PF_CurrentBracket = 0;
1098 /*
1099  #] alloc space :
1100 */
1101  }
1102  while ( *PF_CurrentBracket ) { /* "for each term in the buffer" */
1103 /*
1104  #[ test : bracket & skip if it's equal to the last in PF_CurrentBracket
1105 */
1106  next = fi->POfill;
1107  nextstop = next + *next; nextstop -= ABS(nextstop[-1]);
1108  next++;
1109  last = PF_CurrentBracket+1;
1110  while ( next < nextstop ) {
1111 /*
1112  scan the next term and PF_CurrentBracket
1113 */
1114  if ( *last == HAAKJE && *next == HAAKJE ) {
1115 /*
1116  the part outside brackets is equal => skip this term
1117 */
1118  PRINTFBUF("PF_GetTerm skips",fi->POfill,*fi->POfill);
1119  break;
1120  }
1121 /*
1122  check if the current subterms are equal
1123 */
1124  np = next; next += next[1];
1125  lp = last; last += last[1];
1126  while ( np < next ) if ( *lp++ != *np++ ) goto strip;
1127  }
1128 /*
1129  go on to next term
1130 */
1131  fi->POfill += *fi->POfill;
1132  AN.deferskipped++;
1133 /*
1134  the usual checks
1135 */
1136  if ( fi->POfill >= fi->POfull || fi->POfull == fi->PObuffer )
1137  goto ReceiveNew;
1138  if ( *fi->POfill == 0 ) {
1139  fi->POfill = fi->POfull = fi->PObuffer;
1140  *term = 0;
1141  goto RegRet;
1142  }
1143 /*
1144  #] test :
1145 */
1146  }
1147 /*
1148  #[ copy :
1149 
1150  this term to CurrentBracket and the part outside of bracket
1151  to WorkSpace at term
1152 */
1153 strip:
1154  next = fi->POfill;
1155  nextstop = next + *next; nextstop -= ABS(nextstop[-1]);
1156  next++;
1157  tp++;
1158  lp = PF_CurrentBracket + 1;
1159  while ( next < nextstop ) {
1160  if ( *next == HAAKJE ) {
1161  fi->POfill += *fi->POfill;
1162  while ( next < fi->POfill ) *lp++ = *next++;
1163  *PF_CurrentBracket = lp - PF_CurrentBracket;
1164  *lp = 0;
1165  *tp++ = 1;
1166  *tp++ = 1;
1167  *tp++ = 3;
1168  *term = WORDDIF(tp,term);
1169  PRINTFBUF("PF_GetTerm new brack",PF_CurrentBracket,*PF_CurrentBracket);
1170  PRINTFBUF("PF_GetTerm POfill",fi->POfill,*fi->POfill);
1171  goto RegRet;
1172  }
1173  np = next; next += next[1];
1174  while ( np < next ) *tp++ = *lp++ = *np++;
1175  }
1176  tp = term;
1177 /*
1178  #] copy :
1179 */
1180  }
1181 
1182  i = *fi->POfill;
1183  while ( i-- ) *tp++ = *fi->POfill++;
1184 RegRet:
1185  PRINTFBUF("PF_GetTerm returns",term,*term);
1186  return(*term);
1187 }
1188 
1189 /*
1190  #] PF_GetTerm :
1191  #[ PF_Deferred :
1192 */
1193 
1202 WORD PF_Deferred(WORD *term, WORD level)
1203 {
1204  GETIDENTITY
1205  WORD *bra, *bstop;
1206  WORD *tstart;
1207  FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile;
1208  WORD *next = fi->POfill;
1209  WORD *termout = AT.WorkPointer;
1210  WORD *oldwork = AT.WorkPointer;
1211 
1212  AT.WorkPointer = (WORD *)((UBYTE *)(AT.WorkPointer) + AM.MaxTer);
1213  AR.DeferFlag = 0;
1214 
1215  PRINTFBUF("PF_Deferred (Term) ",term,*term);
1216  PRINTFBUF("PF_Deferred (Bracket)",PF_CurrentBracket,*PF_CurrentBracket);
1217 
1218  bra = bstop = PF_CurrentBracket;
1219  if ( *bstop > 0 ) {
1220  bstop += *bstop;
1221  bstop -= ABS(bstop[-1]);
1222  }
1223  bra++;
1224  while ( *bra != HAAKJE && bra < bstop ) bra += bra[1];
1225  if ( bra >= bstop ) { /* No deferred action! */
1226  AT.WorkPointer = term + *term;
1227  if ( Generator(BHEAD term,level) ) goto DefCall;
1228  AR.DeferFlag = 1;
1229  AT.WorkPointer = oldwork;
1230  return(0);
1231  }
1232  bstop = bra;
1233  tstart = bra + bra[1];
1234  bra = PF_CurrentBracket;
1235  tstart--;
1236  *tstart = bra + *bra - tstart;
1237  bra++;
1238 /*
1239  Status of affairs:
1240  First bracket content starts at tstart.
1241  Next term starts at next.
1242  The outside of the bracket runs from bra = PF_CurrentBracket to bstop.
1243 */
1244  for(;;) {
1245  if ( InsertTerm(BHEAD term,0,AM.rbufnum,tstart,termout,0) < 0 ) {
1246  goto DefCall;
1247  }
1248 /*
1249  call Generator with new composed term
1250 */
1251  AT.WorkPointer = termout + *termout;
1252  if ( Generator(BHEAD termout,level) ) goto DefCall;
1253  AT.WorkPointer = termout;
1254  tstart = next + 1;
1255  if ( tstart >= fi->POfull ) goto ThatsIt;
1256  next += *next;
1257 /*
1258  compare with current bracket
1259 */
1260  while ( bra <= bstop ) {
1261  if ( *bra != *tstart ) goto ThatsIt;
1262  bra++; tstart++;
1263  }
1264 /*
1265  now bra and tstart should both be a HAAKJE
1266 */
1267  bra--; tstart--;
1268  if ( *bra != HAAKJE || *tstart != HAAKJE ) goto ThatsIt;
1269  tstart += tstart[1];
1270  tstart--;
1271  *tstart = next - tstart;
1272  bra = PF_CurrentBracket + 1;
1273  }
1274 
1275 ThatsIt:
1276 /*
1277  AT.WorkPointer = oldwork;
1278 */
1279  AR.DeferFlag = 1;
1280  return(0);
1281 DefCall:
1282  MesCall("PF_Deferred");
1283  SETERROR(-1);
1284 }
1285 
1286 /*
1287  #] PF_Deferred :
1288  #[ PF_Wait4Slave :
1289 */
1290 
1291 static LONG **PF_W4Sstats = 0;
1292 
1299 static int PF_Wait4Slave(int src)
1300 {
1301  int j, tag, next;
1302 
1303  tag = PF_ANY_MSGTAG;
1304  PF_CatchErrorMessages(&src, &tag);
1305  PF_Receive(src, tag, &next, &tag);
1306 
1307  if ( tag != PF_READY_MSGTAG ) {
1308  MesPrint("[%d] PF_Wait4Slave: received MSGTAG %d",(WORD)PF.me,(WORD)tag);
1309  return(-1);
1310  }
1311  if ( PF_W4Sstats == 0 ) {
1312  PF_W4Sstats = (LONG**)Malloc1(sizeof(LONG*),"");
1313  PF_W4Sstats[0] = (LONG*)Malloc1(PF_STATS_SIZE*sizeof(LONG),"");
1314  }
1315  PF_Unpack(PF_W4Sstats[0],PF_STATS_SIZE,PF_LONG);
1316  PF_Statistics(PF_W4Sstats,next);
1317 
1318  PF_Unpack(&j,1,PF_INT);
1319 
1320  if ( j ) {
1321 /*
1322  actions depending on rest of information in last message
1323 */
1324  }
1325  return(next);
1326 }
1327 
1328 /*
1329  #] PF_Wait4Slave :
1330  #[ PF_Wait4SlaveIP :
1331 */
1332 /*
1333  array of expression numbers for PF_InParallel processor.
1334  Each time the master sends expression "i" to the slave
1335  "next" it sets partodoexr[next]=i:
1336 */
1337 static WORD *partodoexr=NULL;
1338 
1346 static int PF_Wait4SlaveIP(int *src)
1347 {
1348  int j,tag,next;
1349 
1350  tag = PF_ANY_MSGTAG;
1351  PF_CatchErrorMessages(src, &tag);
1352  PF_Receive(*src, tag, &next, &tag);
1353  *src=tag;
1354  if ( PF_W4Sstats == 0 ) {
1355  PF_W4Sstats = (LONG**)Malloc1(sizeof(LONG*),"");
1356  PF_W4Sstats[0] = (LONG*)Malloc1(PF_STATS_SIZE*sizeof(LONG),"");
1357  }
1358 
1359  PF_Unpack(PF_W4Sstats[0],PF_STATS_SIZE,PF_LONG);
1360  if ( tag == PF_DATA_MSGTAG )
1361  AR.CurExpr = partodoexr[next];
1362  PF_Statistics(PF_W4Sstats,next);
1363 
1364  PF_Unpack(&j,1,PF_INT);
1365 
1366  if ( j ) {
1367  /* actions depending on rest of information in last message */
1368  }
1369 
1370  return(next);
1371 }
1372 /*
1373  #] PF_Wait4SlaveIP :
1374  #[ PF_WaitAllSlaves :
1375 */
1376 
1385 static int PF_WaitAllSlaves(void)
1386 {
1387  int i, readySlaves, tag, next = PF_ANY_SOURCE;
1388  UBYTE *has_sent = 0;
1389 
1390  has_sent = (UBYTE*)Malloc1(sizeof(UBYTE)*(PF.numtasks + 1),"PF_WaitAllSlaves");
1391  for ( i = 0; i < PF.numtasks; i++ ) has_sent[i] = 0;
1392 
1393  for ( readySlaves = 1; readySlaves < PF.numtasks; ) {
1394  if ( next != PF_ANY_SOURCE) { /*Go to the next slave:*/
1395  do{ /*Note, here readySlaves<PF.numtasks, so this loop can't be infinite*/
1396  if ( ++next >= PF.numtasks ) next = 1;
1397  } while ( has_sent[next] == 1 );
1398  }
1399 /*
1400  Here PF_ProbeWithCatchingErrorMessages() is BLOCKING function if next = PF_ANY_SOURCE:
1401 */
1402  tag = PF_ProbeWithCatchingErrorMessages(&next);
1403 /*
1404  Here next != PF_ANY_SOURCE
1405 */
1406  switch ( tag ) {
1407  case PF_BUFFER_MSGTAG:
1408  case PF_ENDBUFFER_MSGTAG:
1409 /*
1410  Slaves are ready to send their results back
1411 */
1412  if ( has_sent[next] == 0 ) {
1413  has_sent[next] = 1;
1414  readySlaves++;
1415  }
1416  else { /*error?*/
1417  fprintf(stderr,"ERROR next=%d tag=%d\n",next,tag);
1418  }
1419 /*
1420  Note, we do NOT read results here! Messages from these slaves will be read
1421  only after all slaves are ready, further in caller function
1422 */
1423  break;
1424  case 0:
1425 /*
1426  The slave is not ready. Just go to the next slave.
1427  It may appear that there are no more ready slaves, and the master
1428  will wait them in infinite loop. Stupid situation - the master can
1429  receive buffers from ready slaves!
1430 */
1431 #ifdef PF_WITH_SCHED_YIELD
1432 /*
1433  Relinquish the processor:
1434 */
1435  sched_yield();
1436 #endif
1437  break;
1438  case PF_DATA_MSGTAG:
1439  tag=next;
1440  next=PF_Wait4SlaveIP(&tag);
1441 /*
1442  tag must be == PF_DATA_MSGTAG!
1443 */
1444  PF_Statistics(PF_stats,0);
1445  PF_Slave2MasterIP(next);
1446  PF_Master2SlaveIP(next,NULL);
1447  if ( has_sent[next] == 0 ) {
1448  has_sent[next]=1;
1449  readySlaves++;
1450  }else{
1451  /*error?*/
1452  fprintf(stderr,"ERROR next=%d tag=%d\n",next,tag);
1453  }/*if ( has_sent[next] == 0 )*/
1454  break;
1455  case PF_EMPTY_MSGTAG:
1456  tag=next;
1457  next=PF_Wait4SlaveIP(&tag);
1458 /*
1459  tag must be == PF_EMPTY_MSGTAG!
1460 */
1461  PF_Master2SlaveIP(next,NULL);
1462  if ( has_sent[next] == 0 ) {
1463  has_sent[next]=1;
1464  readySlaves++;
1465  }else{
1466  /*error?*/
1467  fprintf(stderr,"ERROR next=%d tag=%d\n",next,tag);
1468  }/*if ( has_sent[next] == 0 )*/
1469  break;
1470  case PF_READY_MSGTAG:
1471 /*
1472  idle slave
1473  May be only PF_READY_MSGTAG:
1474 */
1475  next = PF_Wait4Slave(next);
1476  if ( next == -1 ) return(next); /*Cannot be!*/
1477  if ( has_sent[0] == 0 ) { /*Send the last chunk to the slave*/
1478  PF.sbuf->active = 0;
1479  has_sent[0] = 1;
1480  }
1481  else {
1482 /*
1483  Last chunk was sent, so just send to slave ENDSORT
1484  AN.ninterms must be sent because the slave expects it:
1485 */
1486  PACK_LONG(PF.sbuf->fill[next], AN.ninterms);
1487 /*
1488  This will tell to the slave that there are no more terms:
1489 */
1490  *(PF.sbuf->fill[next])++ = 0;
1491  PF.sbuf->active = next;
1492  }
1493 /*
1494  Send ENDSORT
1495 */
1496  PF_ISendSbuf(next,PF_ENDSORT_MSGTAG);
1497  break;
1498  default:
1499 /*
1500  Error?
1501  Indicates the error. This will force exit from the main loop:
1502 */
1503  MesPrint("!!!Unexpected MPI message src=%d tag=%d.", next, tag);
1504  readySlaves = PF.numtasks+1;
1505  break;
1506  }
1507  }
1508 
1509  if ( has_sent ) M_free(has_sent,"PF_WaitAllSlaves");
1510 /*
1511  0 on sucess (exit from the main loop by loop condition), or -1 if fails
1512  (exit from the main loop since readySlaves=PF.numtasks+1):
1513 */
1514  return(PF.numtasks-readySlaves);
1515 }
1516 
1517 /*
1518  #] PF_WaitAllSlaves :
1519  #[ PF_Processor :
1520 */
1521 
1534 int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
1535 {
1536  GETIDENTITY
1537  WORD *term = AT.WorkPointer;
1538  LONG dd = 0;
1539  PF_BUFFER *sb = PF.sbuf;
1540  WORD j, *s, next;
1541  LONG size, cpu;
1542  POSITION position;
1543  int k, src, tag;
1544  FILEHANDLE *oldoutfile = AR.outfile;
1545 
1546 #ifdef MPI2
1547  if ( PF_shared_buff == NULL ) {
1548  if ( PF_SMWin_Init() == 0 ) {
1549  MesPrint("PF_SMWin_Init error");
1550  exit(-1);
1551  }
1552  }
1553 #endif
1554 
1555  if ( ( (WORD *)(((UBYTE *)(AT.WorkPointer)) + AM.MaxTer ) ) > AT.WorkTop ) return(MesWork());
1556 
1557  /* For redefine statements. */
1558  if ( AC.numpfirstnum > 0 ) {
1559  for ( j = 0; j < AC.numpfirstnum; j++ ) {
1560  AC.inputnumbers[j] = -1;
1561  }
1562  }
1563 
1564  if ( AC.mparallelflag != PARALLELFLAG ) return(0);
1565 
1566  if ( PF.me == MASTER ) {
1567 /*
1568  #[ Master:
1569  #[ write prototype to outfile:
1570 */
1571  WORD oldBracketOn = AR.BracketOn;
1572  WORD *oldBrackBuf = AT.BrackBuf;
1573  WORD oldbracketindexflag = AT.bracketindexflag;
1574 
1575  LONG maxinterms; /* the maximum number of terms in the bucket */
1576  int cmaxinterms; /* a variable controling the transition of maxinterms */
1577  LONG termsinbucket; /* the number of filled terms in the bucket */
1578  LONG ProcessBucketSize = AC.mProcessBucketSize;
1579 
1580  if ( PF.log && AC.CModule >= PF.log )
1581  MesPrint("[%d] working on expression %s in module %l",PF.me,EXPRNAME(i),AC.CModule);
1582  if ( GetTerm(BHEAD term) <= 0 ) {
1583  MesPrint("[%d] Expression %d has problems in scratchfile",PF.me,i);
1584  return(-1);
1585  }
1586  term[3] = i;
1587  if ( AR.outtohide ) {
1588  SeekScratch(AR.hidefile,&position);
1589  e->onfile = position;
1590  if ( PutOut(BHEAD term,&position,AR.hidefile,0) < 0 ) return(-1);
1591  }
1592  else {
1593  SeekScratch(AR.outfile,&position);
1594  e->onfile = position;
1595  if ( PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) return(-1);
1596  }
1597  AR.DeferFlag = 0; /* The master leave the brackets!!! */
1598  AR.Eside = RHSIDE;
1599  if ( ( e->vflags & ISFACTORIZED ) != 0 ) {
1600  AR.BracketOn = 1;
1601  AT.BrackBuf = AM.BracketFactors;
1602  AT.bracketindexflag = 1;
1603  }
1604  if ( AT.bracketindexflag > 0 ) OpenBracketIndex(i);
1605 /*
1606  #] write prototype to outfile:
1607  #[ initialize sendbuffer if necessary:
1608 
1609  the size of the sendbufs is:
1610  MIN(1/PF.numtasks*(AT.SS->sBufsize+AT.SS->lBufsize),AR.infile->POsize)
1611  No allocation for extra buffers necessary, just make sb->buf... point
1612  to the right places in the sortbuffers.
1613 */
1614  NewSort(BHEAD0); /* we need AT.SS to be set for this!!! */
1615  if ( sb == 0 || sb->buff[0] != AT.SS->lBuffer ) {
1616  size = (LONG)((AT.SS->sTop2 - AT.SS->lBuffer)/(PF.numtasks));
1617  if ( size > (LONG)(AR.infile->POsize/sizeof(WORD) - 1) )
1618  size = AR.infile->POsize/sizeof(WORD) - 1;
1619  if ( sb == 0 ) {
1620  if ( ( sb = PF_AllocBuf(PF.numtasks,size*sizeof(WORD),PF.numtasks) ) == NULL )
1621  return(-1);
1622  }
1623  sb->buff[0] = AT.SS->lBuffer;
1624  sb->full[0] = sb->fill[0] = sb->buff[0];
1625  for ( j = 1; j < PF.numtasks; j++ ) {
1626  sb->stop[j-1] = sb->buff[j] = sb->buff[j-1] + size;
1627  }
1628  sb->stop[PF.numtasks-1] = sb->buff[PF.numtasks-1] + size;
1629  PF.sbuf = sb;
1630  }
1631  for ( j = 0; j < PF.numtasks; j++ ) {
1632  sb->full[j] = sb->fill[j] = sb->buff[j];
1633  }
1634 /*
1635  #] initialize sendbuffer if necessary:
1636  #[ loop for all terms in infile:
1637 */
1638  /*
1639  * The initial value of maxinterms is determined by the user given
1640  * ProcessBucketSize and the number of terms in the current expression.
1641  * We make the initial maxinterms smaller, so that we get the all
1642  * workers busy as soon as possible.
1643  */
1644  maxinterms = ProcessBucketSize / 100;
1645  if ( maxinterms > e->counter / (PF.numtasks - 1) / 4 )
1646  maxinterms = e->counter / (PF.numtasks - 1) / 4;
1647  if ( maxinterms < 1 ) maxinterms = 1;
1648  cmaxinterms = 0;
1649  /*
1650  * Copy them always to sb->buff[0]. When that is full, wait for
1651  * the next slave to accept terms, exchange sb->buff[0] and
1652  * sb->buff[next], send sb->buff[next] to next slave and go on
1653  * filling the now empty sb->buff[0].
1654  */
1655  AN.ninterms = 0;
1656  termsinbucket = 0;
1657  PACK_LONG(sb->fill[0], 1);
1658  while ( GetTerm(BHEAD term) ) {
1659  AN.ninterms++; dd = AN.deferskipped;
1660  if ( AC.CollectFun && *term <= (LONG)(AM.MaxTer/(2*sizeof(WORD))) ) {
1661  if ( GetMoreTerms(term) < 0 ) {
1662  LowerSortLevel(); return(-1);
1663  }
1664  }
1665  PRINTFBUF("PF_Processor gets",term,*term);
1666  if ( termsinbucket >= maxinterms || sb->fill[0] + *term >= sb->stop[0] ) {
1667  next = PF_Wait4Slave(PF_ANY_SOURCE);
1668 
1669  sb->fill[next] = sb->fill[0];
1670  sb->full[next] = sb->full[0];
1671  SWAP(sb->stop[next], sb->stop[0]);
1672  SWAP(sb->buff[next], sb->buff[0]);
1673  sb->fill[0] = sb->full[0] = sb->buff[0];
1674  sb->active = next;
1675 
1676 #ifdef MPI2
1677  if ( PF_Put_origin(next) == 0 ) {
1678  printf("PF_Put_origin error...\n");
1679  }
1680 #else
1681  PF_ISendSbuf(next,PF_TERM_MSGTAG);
1682 #endif
1683  /* Initialize the next bucket. */
1684  termsinbucket = 0;
1685  PACK_LONG(sb->fill[0], AN.ninterms);
1686  /*
1687  * For the "slow startup". We double maxinterms up to ProcessBucketSize
1688  * after (houpefully) the all workers got some terms.
1689  */
1690  if ( cmaxinterms >= PF.numtasks - 2 ) {
1691  maxinterms *= 2;
1692  if ( maxinterms >= ProcessBucketSize ) {
1693  cmaxinterms = -1;
1694  maxinterms = ProcessBucketSize;
1695  }
1696  }
1697  else if ( cmaxinterms >= 0 ) {
1698  cmaxinterms++;
1699  }
1700  }
1701  j = *(s = term);
1702  NCOPY(sb->fill[0], s, j);
1703  termsinbucket++;
1704  }
1705  /* NOTE: The last chunk will be sent to a slave at EndSort() => PF_EndSort()
1706  * => PF_WaitAllSlaves(). */
1707  AN.ninterms += dd;
1708 /*
1709  #] loop for all terms in infile:
1710  #[ Clean up & EndSort:
1711 */
1712  if ( LastExpression ) {
1713  UpdateMaxSize();
1714  if ( AR.infile->handle >= 0 ) {
1715  CloseFile(AR.infile->handle);
1716  AR.infile->handle = -1;
1717  remove(AR.infile->name);
1718  PUTZERO(AR.infile->POposition);
1719  }
1720  AR.infile->POfill = AR.infile->POfull = AR.infile->PObuffer;
1721  }
1722  if ( AR.outtohide ) AR.outfile = AR.hidefile;
1723  PF.parallel = 1;
1724  if ( EndSort(BHEAD AM.S0->sBuffer,0) < 0 ) return(-1);
1725  PF.parallel = 0;
1726  if ( AR.outtohide ) {
1727  AR.outfile = oldoutfile;
1728  AR.hidefile->POfull = AR.hidefile->POfill;
1729  }
1730  UpdateMaxSize();
1731  AR.BracketOn = oldBracketOn;
1732  AT.BrackBuf = oldBrackBuf;
1733  if ( ( e->vflags & TOBEFACTORED ) != 0 )
1734  poly_factorize_expression(e);
1735  else if ( ( ( e->vflags & TOBEUNFACTORED ) != 0 )
1736  && ( ( e->vflags & ISFACTORIZED ) != 0 ) )
1737  poly_unfactorize_expression(e);
1738  AT.bracketindexflag = oldbracketindexflag;
1739  AR.GetFile = 0;
1740  AR.outtohide = 0;
1741  /*
1742  * NOTE: e->numdummies, e->vflags and AR.exprflags will be updated
1743  * after gathering the information from all slaves.
1744  */
1745 /*
1746  #] Clean up & EndSort:
1747  #[ Collect (stats,prepro,...):
1748 */
1749  DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms));
1750  PF_CatchErrorMessagesForAll();
1751  e->numdummies = 0;
1752  for ( k = 1; k < PF.numtasks; k++ ) {
1753  PF_LongSingleReceive(PF_ANY_SOURCE, PF_ENDSORT_MSGTAG, &src, &tag);
1754  PF_LongSingleUnpack(PF_stats[src], PF_STATS_SIZE, PF_LONG);
1755  {
1756  WORD numdummies, expchanged;
1757  PF_LongSingleUnpack(&numdummies, 1, PF_WORD);
1758  PF_LongSingleUnpack(&expchanged, 1, PF_WORD);
1759  if ( e->numdummies < numdummies ) e->numdummies = numdummies;
1760  AR.expchanged |= expchanged;
1761  }
1762  /* Now handle redefined preprocessor variables. */
1763  if ( AC.numpfirstnum > 0 ) PF_UnpackRedefinedPreVars();
1764  }
1765  if ( ! AC.OldParallelStats ) {
1766  /* Now we can calculate AT.SS->GenTerms from the statistics of the slaves. */
1767  LONG genterms = 0;
1768  for ( k = 1; k < PF.numtasks; k++ ) {
1769  genterms += PF_stats[k][3];
1770  }
1771  AT.SS->GenTerms = genterms;
1772  WriteStats(&PF_exprsize, 2);
1773  }
1774  PF_Statistics(PF_stats,0);
1775 /*
1776  #] Collect (stats,prepro,...):
1777  #[ Update flags :
1778 */
1779  if ( AM.S0->TermsLeft ) e->vflags &= ~ISZERO;
1780  else e->vflags |= ISZERO;
1781  if ( AR.expchanged == 0 ) e->vflags |= ISUNMODIFIED;
1782  if ( AM.S0->TermsLeft ) AR.expflags |= ISZERO;
1783  if ( AR.expchanged ) AR.expflags |= ISUNMODIFIED;
1784 /*
1785  #] Update flags :
1786  #] Master:
1787 */
1788  }
1789  else {
1790 /*
1791  #[ Slave :
1792 */
1793 /*
1794  #[ Generator Loop & EndSort :
1795 
1796  loop for all terms to get from master, call Generator for each of them
1797  then call EndSort and do cleanup (to be implemented)
1798 */
1799  WORD oldBracketOn = AR.BracketOn;
1800  WORD *oldBrackBuf = AT.BrackBuf;
1801  WORD oldbracketindexflag = AT.bracketindexflag;
1802 
1803  /* For redefine statements. */
1804  if ( AC.numpfirstnum > 0 ) {
1805  for ( j = 0; j < AC.numpfirstnum; j++ ) {
1806  AC.inputnumbers[j] = -1;
1807  }
1808  }
1809 
1810  SeekScratch(AR.outfile,&position);
1811  e->onfile = position;
1812  AR.DeferFlag = AC.ComDefer;
1813  AR.Eside = RHSIDE;
1814  if ( ( e->vflags & ISFACTORIZED ) != 0 ) {
1815  AR.BracketOn = 1;
1816  AT.BrackBuf = AM.BracketFactors;
1817  AT.bracketindexflag = 1;
1818  }
1819  NewSort(BHEAD0);
1820  AR.MaxDum = AM.IndDum;
1821  AN.ninterms = 0;
1822  PF_linterms = 0;
1823  PF.parallel = 1;
1824 #ifdef MPI2
1825  AR.infile->POfull = AR.infile->POfill = AR.infile->PObuffer = PF_shared_buff;
1826 #endif
1827  {
1828  FILEHANDLE *fi = AC.RhsExprInModuleFlag && PF.rhsInParallel ? &PF.slavebuf : AR.infile;
1829  fi->POfull = fi->POfill = fi->PObuffer;
1830  }
1831  /* FIXME: AN.ninterms is still broken when AN.deferskipped is non-zero.
1832  * It still needs some work, also in PF_GetTerm(). (TU 30 Aug 2011) */
1833  while ( PF_GetTerm(term) ) {
1834  PF_linterms++; AN.ninterms++; dd = AN.deferskipped;
1835  AT.WorkPointer = term + *term;
1836  AN.RepPoint = AT.RepCount + 1;
1837  if ( ( e->vflags & ISFACTORIZED ) != 0 && term[1] == HAAKJE ) {
1838  StoreTerm(BHEAD term);
1839  continue;
1840  }
1841  if ( AR.DeferFlag ) {
1842  AR.CurDum = AN.IndDum = Expressions[AR.CurExpr].numdummies + AM.IndDum;
1843  }
1844  else {
1845  AN.IndDum = AM.IndDum;
1846  AR.CurDum = ReNumber(BHEAD term);
1847  }
1848  if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
1849  if ( AN.ncmod ) {
1850  if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
1851  else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
1852  }
1853  if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
1854  && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
1855  PolyFunClean(BHEAD term);
1856  }
1857  if ( Generator(BHEAD term,0) ) {
1858  MesPrint("[%d] PF_Processor: Error in Generator",PF.me);
1859  LowerSortLevel(); return(-1);
1860  }
1861  PF_linterms += dd; AN.ninterms += dd;
1862  }
1863  PF_linterms += dd; AN.ninterms += dd;
1864  {
1865  /*
1866  * EndSort() overrides AR.outfile->PObuffer etc. (See also PF_EndSort()),
1867  * but it causes a problem because
1868  * (1) PF_EndSort() sets AR.outfile->PObuffer to a send-buffer.
1869  * (2) RevertScratch() clears AR.infile, but then swaps buffers of AR.infile
1870  * and AR.outfile.
1871  * (3) RHS expressions are stored to AR.infile->PObuffer.
1872  * (4) Again, PF_EndSort() sets AR.outfile->PObuffer, but now AR.outfile->PObuffer
1873  * == AR.infile->PObuffer because of (1) and (2).
1874  * (5) The result goes to AR.outfile. This breaks the RHS expressions,
1875  * which may be needed for the next expression.
1876  * Solution: backup & restore AR.outfile->PObuffer etc. (TU 14 Sep 2011)
1877  */
1878  FILEHANDLE *fout = AR.outfile;
1879  WORD *oldbuff = fout->PObuffer;
1880  WORD *oldstop = fout->POstop;
1881  LONG oldsize = fout->POsize;
1882  if ( EndSort(BHEAD AM.S0->sBuffer, 0) < 0 ) return -1;
1883  fout->PObuffer = oldbuff;
1884  fout->POstop = oldstop;
1885  fout->POsize = oldsize;
1886  fout->POfill = fout->POfull = fout->PObuffer;
1887  }
1888  AR.BracketOn = oldBracketOn;
1889  AT.BrackBuf = oldBrackBuf;
1890  AT.bracketindexflag = oldbracketindexflag;
1891 /*
1892  #] Generator Loop & EndSort :
1893  #[ Collect (stats,prepro...) :
1894 */
1895  DBGOUT_NINTERMS(1, ("PF.me=%d AN.ninterms=%d PF_linterms=%d ENDSORT\n", (int)PF.me, (int)AN.ninterms, (int)PF_linterms));
1897  cpu = TimeCPU(1);
1898  size = 0;
1899  PF_LongSinglePack(&cpu, 1, PF_LONG);
1900  PF_LongSinglePack(&size, 1, PF_LONG);
1901  PF_LongSinglePack(&PF_linterms, 1, PF_LONG);
1902  PF_LongSinglePack(&AM.S0->GenTerms, 1, PF_LONG);
1903  PF_LongSinglePack(&AM.S0->TermsLeft, 1, PF_LONG);
1904  {
1905  WORD numdummies = AR.MaxDum - AM.IndDum;
1906  PF_LongSinglePack(&numdummies, 1, PF_WORD);
1907  PF_LongSinglePack(&AR.expchanged, 1, PF_WORD);
1908  }
1909  /* Now handle redefined preprocessor variables. */
1910  if ( AC.numpfirstnum > 0 ) PF_PackRedefinedPreVars();
1911  PF_LongSingleSend(MASTER, PF_ENDSORT_MSGTAG);
1912 /*
1913  #] Collect (stats,prepro...) :
1914 
1915  This operation is moved to the beginning of each block, see PreProcessor
1916  in pre.c.
1917 
1918  #] Slave :
1919 */
1920  if ( PF.log ) {
1921  UBYTE lbuf[24];
1922  NumToStr(lbuf,AC.CModule);
1923  fprintf(stderr,"[%d|%s] Endsort,Collect,Broadcast done\n",PF.me,lbuf);
1924  fflush(stderr);
1925  }
1926  }
1927  return(0);
1928 }
1929 
1930 /*
1931  #] PF_Processor :
1932  #] proces.c :
1933  #[ startup :, prepro & compile
1934  #[ PF_Init :
1935 */
1936 
1945 int PF_Init(int *argc, char ***argv)
1946 {
1947  UBYTE *fp, *ubp;
1948  char *c;
1949  int fpsize = 0;
1950 /*
1951  this should definitly be somewhere else ...
1952 */
1953  PF_CurrentBracket = 0;
1954 
1955  PF.numtasks = 0; /* number of tasks, is determined in PF_Lib_Init or must be set before! */
1956  PF.numsbufs = 2; /* might be changed by LibInit ! */
1957  PF.numrbufs = 2; /* might be changed by LibInit ! */
1958 
1959  PF_LibInit(argc,argv);
1960  PF_RealTime(PF_RESET);
1961 
1962  PF.log = 0;
1963  PF.parallel = 0;
1964  PF_statsinterval = 10;
1965  PF.rhsInParallel=1;
1966  PF.exprbufsize=4096;/*in WORDs*/
1967 
1968  if ( PF.me == MASTER ) {
1969 #ifdef PF_WITHGETENV
1970 /*
1971  get these from the environment at the moment sould be in setfile/tail
1972 */
1973  if ( ( c = getenv("PF_LOG") ) != 0 ) {
1974  if ( *c ) PF.log = (int)atoi(c);
1975  else PF.log = 1;
1976  fprintf(stderr,"[%d] changing PF.log to %d\n",PF.me,PF.log);
1977  fflush(stderr);
1978  }
1979  if ( ( c = (char*)getenv("PF_RBUFS") ) != 0 ) {
1980  PF.numrbufs = (int)atoi(c);
1981  fprintf(stderr,"[%d] changing numrbufs to: %d\n",PF.me,PF.numrbufs);
1982  fflush(stderr);
1983  }
1984  if ( ( c = (char*)getenv("PF_SBUFS") ) != 0 ) {
1985  PF.numsbufs = (int)atoi(c);
1986  fprintf(stderr,"[%d] changing numsbufs to: %d\n",PF.me,PF.numsbufs);
1987  fflush(stderr);
1988  }
1989  if ( PF.numsbufs > 10 ) PF.numsbufs = 10;
1990  if ( PF.numsbufs < 1 ) PF.numsbufs = 1;
1991  if ( PF.numrbufs > 2 ) PF.numrbufs = 2;
1992  if ( PF.numrbufs < 1 ) PF.numrbufs = 1;
1993 
1994  if ( ( c = getenv("PF_STATS") ) ) {
1995  UBYTE lbuf[24];
1996  PF_statsinterval = (int)atoi(c);
1997  NumToStr(lbuf,PF_statsinterval);
1998  fprintf(stderr,"[%d] changing PF_statsinterval to %s\n",PF.me,lbuf);
1999  fflush(stderr);
2000  if ( PF_statsinterval < 1 ) PF_statsinterval = 10;
2001  }
2002  fp = (UBYTE*)getenv("FORMPATH");
2003  if ( fp ) {
2004  ubp = fp;
2005  while ( *ubp++ ) fpsize++;
2006  if ( AC.OldParallelStats ) {
2007  fprintf(stderr,"[%d] changing Path to %s\n",PF.me,fp);
2008  fflush(stderr);
2009  }
2010  }
2011  else {
2012  fp = (UBYTE*)"";
2013  fpsize++;
2014  }
2015  fpsize++;
2016 #endif
2017  }
2018 /*
2019  #[ Broadcast settings from getenv: could also be done in PF_DoSetup
2020 */
2021  if ( PF.me == MASTER ) {
2022  PF_PreparePack();
2023  PF_Pack(&PF.log,1,PF_INT);
2024  PF_Pack(&PF.numrbufs,1,PF_WORD);
2025  PF_Pack(&PF.numsbufs,1,PF_WORD);
2026  PF_Pack(&fpsize,1,PF_INT);
2027  PF_Pack(fp,fpsize,PF_BYTE);
2028  }
2029  PF_Broadcast();
2030  if ( PF.me != MASTER ) {
2031  PF_Unpack(&PF.log,1,PF_INT);
2032  PF_Unpack(&PF.numrbufs,1,PF_WORD);
2033  PF_Unpack(&PF.numsbufs,1,PF_WORD);
2034  PF_Unpack(&fpsize,1,PF_INT);
2035  AM.Path = (UBYTE*)Malloc1(fpsize*sizeof(UBYTE),"Path");
2036  PF_Unpack(AM.Path,fpsize,PF_BYTE);
2037  if ( PF.log ) {
2038  fprintf(stderr, "[%d] log=%d rbufs=%d sbufs=%d path=%s\n",
2039  PF.me, PF.log, PF.numrbufs, PF.numsbufs, AM.Path);
2040  fflush(stderr);
2041  }
2042  }
2043 /*
2044  #] Broadcast settings from getenv:
2045 */
2046  return(0);
2047 }
2048 /*
2049  #] PF_Init :
2050  #[ PF_Terminate :
2051 */
2052 
2060 int PF_Terminate(int errorcode)
2061 {
2062  return PF_LibTerminate(errorcode);
2063 }
2064 
2065 /*
2066  #] PF_Terminate :
2067  #[ PF_GetSlaveTimes :
2068 */
2069 
2077 {
2078  LONG slavetimes = 0;
2079  LONG t = PF.me == MASTER ? 0 : AM.SumTime + TimeCPU(1);
2080  MPI_Reduce(&t, &slavetimes, 1, PF_LONG, MPI_SUM, MASTER, PF_COMM);
2081  return slavetimes;
2082 }
2083 
2084 /*
2085  #] PF_GetSlaveTimes :
2086  #] startup :
2087  #[ PF_BroadcastNumber :
2088 */
2089 
2097 {
2098 #ifdef PF_DEBUG_BCAST_LONG
2099  if ( PF.me == MASTER ) {
2100  MesPrint(">> Broadcast LONG: %l", x);
2101  }
2102 #endif
2103  PF_Bcast(&x, sizeof(LONG));
2104  return x;
2105 }
2106 
2107 /*
2108  #] PF_BroadcastNumber :
2109  #[ PF_BroadcastBuffer :
2110 */
2111 
2123 void PF_BroadcastBuffer(WORD **buffer, LONG *length)
2124 {
2125  WORD *p;
2126  LONG rest;
2127 #ifdef PF_DEBUG_BCAST_BUF
2128  if ( PF.me == MASTER ) {
2129  MesPrint(">> Broadcast Buffer: length=%l", *length);
2130  }
2131 #endif
2132  /* Initialize the buffer on the slaves. */
2133  if ( PF.me != MASTER ) {
2134  *buffer = NULL;
2135  }
2136  /* Broadcast the length of the buffer. */
2137  *length = PF_BroadcastNumber(*length);
2138  if ( *length <= 0 ) return;
2139  /* Allocate the buffer on the slaves. */
2140  if ( PF.me != MASTER ) {
2141  *buffer = (WORD *)Malloc1(*length * sizeof(WORD), "PF_BroadcastBuffer");
2142  }
2143  /* Broadcast the data in the buffer. */
2144  p = *buffer;
2145  rest = *length;
2146  while ( rest > 0 ) {
2147  int l = rest < (LONG)PF.exprbufsize ? (int)rest : PF.exprbufsize;
2148  PF_Bcast(p, l * sizeof(WORD));
2149  p += l;
2150  rest -= l;
2151  }
2152 }
2153 
2154 /*
2155  #] PF_BroadcastBuffer :
2156  #[ PF_BroadcastString :
2157 */
2158 
2165 int PF_BroadcastString(UBYTE *str)
2166 {
2167  int clength = 0;
2168 /*
2169  If string does not fit to the PF_buffer, it
2170  will be split into chanks. Next chank is started at str+clength
2171 */
2172  UBYTE *cstr=str;
2173 /*
2174  Note, compilation is performed INDEPENDENTLY on AC.mparallelflag!
2175  No if ( AC.mparallelflag == PARALLELFLAG ) !!
2176 */
2177  do {
2178  cstr += clength; /*at each step for all slaves and master */
2179 
2180  if ( MASTER == PF.me ) { /*Pack str*/
2181 /*
2182  initialize buffers
2183 */
2184  if ( PF_PreparePack() != 0 ) Terminate(-1);
2185  if ( ( clength = PF_PackString(cstr) ) <0 ) Terminate(-1);
2186  }
2187  PF_Broadcast();
2188 
2189  if ( MASTER != PF.me ) {
2190 /*
2191  Slave - unpack received string
2192  For slaves buffers are initialised automatically.
2193 */
2194  if ( ( clength = PF_UnpackString(cstr) ) < 0 ) Terminate(-1);
2195  }
2196  } while ( cstr[clength-1] != '\0' );
2197  return (0);
2198 }
2199 
2200 /*
2201  #] PF_BroadcastString :
2202  #[ PF_BroadcastPreDollar :
2203 */
2204 
2220 int PF_BroadcastPreDollar(WORD **dbuffer, LONG *newsize, int *numterms)
2221 {
2222  int err = 0;
2223  LONG i;
2224 /*
2225  Note, compilation is performed INDEPENDENTLY on AC.mparallelflag!
2226  No if(AC.mparallelflag==PARALLELFLAG) !!
2227 */
2228  if ( MASTER == PF.me ) {
2229 /*
2230  The problem is that sometimes dollar variables are longer
2231  than PF_packbuf! So we split long expression into chunks.
2232  There are n filled chunks and one portially filled chunk:
2233 */
2234  LONG n = ((*newsize)+1)/PF_maxDollarChunkSize;
2235 /*
2236  ...and one more chunk for the rest; if the expression fits to
2237  the buffer without splitting, the latter will be the only one.
2238 
2239  PF_maxDollarChunkSize is the maximal number of items fitted to
2240  the buffer. It is calculated in PF_LibInit() in mpi.c.
2241  PF_maxDollarChunkSize is calculated for the first step, when
2242  two fields (numterms and newsize, see below) are already packed.
2243  For simplicity, this value is used also for all steps, in
2244  despite of it is a bit less than maximally available space.
2245 */
2246  WORD *thechunk = *dbuffer;
2247 
2248  err = PF_PreparePack(); /* initialize buffers */
2249  err |= PF_Pack(numterms,1,PF_INT);
2250  err |= PF_Pack(newsize,1,PF_LONG); /* pack the size */
2251 /*
2252  Pack and broadcast completely filled chunks.
2253  It may happen, this loop is not entered at all:
2254 */
2255  for ( i = 0; i < n; i++ ) {
2256  err |= PF_Pack(thechunk,PF_maxDollarChunkSize,PF_WORD);
2257  err |= PF_Broadcast();
2258  thechunk +=PF_maxDollarChunkSize;
2259  PF_PreparePack();
2260  }
2261 /*
2262  Pack and broadcast the rest:
2263 */
2264  if ( ( n = ( (*newsize)+1)%PF_maxDollarChunkSize ) != 0 ) {
2265  err |= PF_Pack(thechunk,n,PF_WORD);
2266  err |= PF_Broadcast();
2267  }
2268 #ifdef PF_DEBUG_BCAST_PREDOLLAR
2269  MesPrint(">> Broadcast PreDollar: newsize=%d numterms=%d", (int)*newsize, *numterms);
2270 #endif
2271  }
2272  if ( MASTER != PF.me ) { /* Slave - unpack received buffer */
2273  WORD *thechunk;
2274  LONG n, therest, thesize;
2275  err |= PF_Broadcast();
2276  err |=PF_Unpack(numterms,1,PF_INT);
2277  err |=PF_Unpack(newsize,1,PF_LONG);
2278 /*
2279  Now we know the buffer size.
2280 */
2281  thesize = (*newsize)+1;
2282 /*
2283  Evaluate the number of completely filled chunks. The last step must be
2284  treated separately, so -1:
2285 */
2286  n = (thesize/PF_maxDollarChunkSize) - 1;
2287 /*
2288  Note, here n can be <0, this is ok.
2289 */
2290  therest = thesize % PF_maxDollarChunkSize;
2291  thechunk = *dbuffer =
2292  (WORD*)Malloc1( thesize * sizeof(WORD),"$-buffer slave");
2293  if ( thechunk == NULL ) return(err|4);
2294 /*
2295  Unpack completely filled chunks and receive the next portion.
2296  It may happen, this loop is not entered at all:
2297 */
2298  for ( i = 0; i < n; i++ ) {
2299  err |= PF_Unpack(thechunk,PF_maxDollarChunkSize,PF_WORD);
2300  thechunk += PF_maxDollarChunkSize;
2301  err |= PF_Broadcast();
2302  }
2303 /*
2304  Now the last completely filled chunk:
2305 */
2306  if ( n >= 0 ) {
2307  err |= PF_Unpack(thechunk,PF_maxDollarChunkSize,PF_WORD);
2308  thechunk += PF_maxDollarChunkSize;
2309  if ( therest != 0 ) err |= PF_Broadcast();
2310  }
2311 /*
2312  Unpack the rest (it is already received!):
2313 */
2314  if ( therest != 0 ) err |= PF_Unpack(thechunk,therest,PF_WORD);
2315  }
2316  return (err);
2317 }
2318 
2319 /*
2320  #] PF_BroadcastPreDollar :
2321  #[ Synchronization of modified dollar variables :
2322  #[ Helper functions :
2323  #[ dollarlen :
2324 */
2325 
2329 static inline LONG dollarlen(const WORD *terms)
2330 {
2331  const WORD *p = terms;
2332  while ( *p ) p += *p;
2333  return p - terms; /* Not including the null terminator. */
2334 }
2335 
2336 /*
2337  #] dollarlen :
2338  #[ dollar_mod_type :
2339 */
2340 
2345 static inline WORD dollar_mod_type(WORD index)
2346 {
2347  int i;
2348  for ( i = 0; i < NumModOptdollars; i++ )
2349  if ( ModOptdollars[i].number == index ) break;
2350  if ( i >= NumModOptdollars ) return -1;
2351  return ModOptdollars[i].type;
2352 }
2353 
2354 
2355 /*
2356  #] dollar_mod_type :
2357  #] Helper functions :
2358  #[ PF_CollectModifiedDollars :
2359 */
2360 
2361 /*
2362  #[ dollar_to_be_collected :
2363 */
2364 
2369 static inline int dollar_to_be_collected(WORD index)
2370 {
2371  switch ( dollar_mod_type(index) ) {
2372  case MODSUM:
2373  case MODMAX:
2374  case MODMIN:
2375  return 1;
2376  default:
2377  return 0;
2378  }
2379 }
2380 
2381 /*
2382  #] dollar_to_be_collected :
2383  #[ copy_dollar :
2384 */
2385 
2390 static inline void copy_dollar(WORD index, WORD type, const WORD *where, LONG size)
2391 {
2392  DOLLARS d = Dollars + index;
2393 
2394  CleanDollarFactors(d);
2395 
2396  if ( type != DOLZERO && where != NULL && where != &AM.dollarzero && where[0] != 0 && size > 0 ) {
2397  if ( size > d->size || size < d->size / 4 ) { /* Reallocate if not enough or too much. */
2398  if ( d->where && d->where != &AM.dollarzero )
2399  M_free(d->where, "old content of dollar");
2400  d->where = Malloc1(sizeof(WORD) * size, "copy buffer to dollar");
2401  d->size = size;
2402  }
2403  d->type = type;
2404  WCOPY(d->where, where, size);
2405  }
2406  else {
2407  if ( d->where && d->where != &AM.dollarzero )
2408  M_free(d->where, "old content of dollar");
2409  d->type = DOLZERO;
2410  d->where = &AM.dollarzero;
2411  d->size = 0;
2412  }
2413 }
2414 
2415 /*
2416  #] copy_dollar :
2417  #[ compare_two_expressions :
2418 */
2419 
2424 static inline int compare_two_expressions(const WORD *e1, const WORD *e2)
2425 {
2426  GETIDENTITY
2427  /*
2428  * We consider the cases that
2429  * (1) the expression has no term,
2430  * (2) the expression has only one term and it is a number,
2431  * (3) otherwise.
2432  * Assume that the expressions are sorted and all terms are normalized.
2433  * The numerators of the coefficients must never be zero.
2434  *
2435  * Note that TwoExprCompare() is not adequate for our purpose
2436  * (as of 6 Aug. 2013), e.g., TwoExprCompare({0}, {4, 1, 1, -1}, LESS)
2437  * returns TRUE.
2438  */
2439  if ( e1[0] == 0 ) {
2440  if ( e2[0] == 0 ) {
2441  return(0);
2442  }
2443  else if ( e2[e2[0]] == 0 && e2[0] == ABS(e2[e2[0] - 1]) + 1 ) {
2444  if ( e2[e2[0] - 1] > 0 )
2445  return(-1);
2446  else
2447  return(+1);
2448  }
2449  }
2450  else if ( e1[e1[0]] == 0 && e1[0] == ABS(e1[e1[0] - 1]) + 1 ) {
2451  if ( e2[0] == 0 ) {
2452  if ( e1[e1[0] - 1] > 0 )
2453  return(+1);
2454  else
2455  return(-1);
2456  }
2457  else if ( e2[e2[0]] == 0 && e2[0] == ABS(e2[e2[0] - 1]) + 1 ) {
2458  return(CompCoef((WORD *)e1, (WORD *)e2));
2459  }
2460  }
2461  /* The expressions are not so simple. Define the order by each term. */
2462  while ( e1[0] && e2[0] ) {
2463  int c = CompareTerms(BHEAD (WORD *)e1, (WORD *)e2, 1);
2464  if ( c < 0 )
2465  return(-1);
2466  else if ( c > 0 )
2467  return(+1);
2468  e1 += e1[0];
2469  e2 += e2[0];
2470  }
2471  if ( e1[0] ) return(+1);
2472  if ( e2[0] ) return(-1);
2473  return(0);
2474 }
2475 
2476 /*
2477  #] compare_two_expressions :
2478  #[ Variables :
2479 */
2480 
2481 typedef struct {
2482  VectorStruct(WORD) buf;
2483  LONG size;
2484  WORD type;
2485  PADPOINTER(1,0,1,0);
2486 } dollar_buf;
2487 
2488 /* Buffers used to store data for each variable from each slave. */
2489 static Vector(dollar_buf, dollar_slave_bufs);
2490 
2491 /*
2492  #] Variables :
2493 */
2494 
2509 {
2510  int i, j, ndollars;
2511  /*
2512  * If the current module was executed in the sequential mode,
2513  * there are no modified module on the slaves.
2514  */
2515  if ( AC.mparallelflag != PARALLELFLAG ) return 0;
2516  /*
2517  * Count the number of (potentially) modified dollar variables, which we need to collect.
2518  * Here we need to collect all max/min/sum variables.
2519  */
2520  ndollars = 0;
2521  for ( i = 0; i < NumPotModdollars; i++ ) {
2522  WORD index = PotModdollars[i];
2523  if ( dollar_to_be_collected(index) ) ndollars++;
2524  }
2525  if ( ndollars == 0 ) return 0; /* No dollars to be collected. */
2526 
2527  if ( PF.me == MASTER ) {
2528 /*
2529  #[ Master :
2530 */
2531  int nslaves, nvars;
2532  /* Prepare receive buffers. We need ndollars*(PF.numtasks-1) buffers. */
2533  int nbufs = ndollars * (PF.numtasks - 1);
2534  VectorReserve(dollar_slave_bufs, nbufs);
2535  for ( i = VectorSize(dollar_slave_bufs); i < nbufs; i++ ) {
2536  VectorInit(VectorPtr(dollar_slave_bufs)[i].buf);
2537  }
2538  VectorSize(dollar_slave_bufs) = nbufs;
2539  /* Receive data from each slave. */
2540  for ( nslaves = 1; nslaves < PF.numtasks; nslaves++ ) {
2541  int src;
2542  PF_LongSingleReceive(PF_ANY_SOURCE, PF_DOLLAR_MSGTAG, &src, NULL);
2543  nvars = 0;
2544  for ( i = 0; i < NumPotModdollars; i++ ) {
2545  WORD index = PotModdollars[i];
2546  dollar_buf *b;
2547  if ( !dollar_to_be_collected(index) ) continue;
2548  b = &VectorPtr(dollar_slave_bufs)[(PF.numtasks - 1) * nvars + (src - 1)];
2549  PF_LongSingleUnpack(&b->type, 1, PF_WORD);
2550  if ( b->type != DOLZERO ) {
2551  LONG size;
2552  WORD *where;
2553  PF_LongSingleUnpack(&size, 1, PF_LONG);
2554  VectorReserve(b->buf, size + 1);
2555  where = VectorPtr(b->buf);
2556  PF_LongSingleUnpack(where, size, PF_WORD);
2557  where[size] = 0; /* The null terminator is needed. */
2558  b->size = size + 1; /* Including the null terminator. */
2559  /* Note that we don't collect factored stuff for max/min/sum variables. */
2560  }
2561  else {
2562  VectorReserve(b->buf, 1);
2563  VectorPtr(b->buf)[0] = 0;
2564  b->size = 0;
2565  }
2566  nvars++;
2567  }
2568  }
2569  /*
2570  * Combine received dollars. The FORM reference manual says maximum/minimum/sum
2571  * $-variables must have a numerical value, however, this routine should work also
2572  * for non-numerical cases, although the maximum/minimum value for non-numerical
2573  * terms has ambiguity.
2574  */
2575  nvars = 0;
2576  for ( i = 0; i < NumPotModdollars; i++ ) {
2577  WORD index = PotModdollars[i];
2578  WORD dtype;
2579  DOLLARS d;
2580  dollar_buf *b;
2581  if ( !dollar_to_be_collected(index) ) continue;
2582  d = Dollars + index;
2583  b = &VectorPtr(dollar_slave_bufs)[(PF.numtasks - 1) * nvars];
2584  dtype = dollar_mod_type(index);
2585  switch ( dtype ) {
2586  case MODMAX:
2587  case MODMIN: {
2588 /*
2589  #[ MODMAX & MODMIN :
2590 */
2591  int selected = 0;
2592  for ( j = 1; j < PF.numtasks - 1; j++ ) {
2593  int c = compare_two_expressions(VectorPtr(b[j].buf), VectorPtr(b[selected].buf));
2594  if ( (dtype == MODMAX && c > 0) || (dtype == MODMIN && c < 0) )
2595  selected = j;
2596  }
2597  b = b + selected;
2598  copy_dollar(index, b->type, VectorPtr(b->buf), b->size);
2599 /*
2600  #] MODMAX & MODMIN :
2601 */
2602  break;
2603  }
2604  case MODSUM: {
2605 /*
2606  #[ MODSUM :
2607 */
2608  GETIDENTITY
2609  int err = 0;
2610 
2611  CBUF *C = cbuf + AM.rbufnum;
2612  WORD *oldwork = AT.WorkPointer, *oldcterm = AN.cTerm;
2613  WORD olddefer = AR.DeferFlag, oldnumlhs = AR.Cnumlhs, oldnumrhs = C->numrhs;
2614 
2615  LONG size;
2616  WORD type, *dbuf;
2617 
2618  AN.cTerm = 0;
2619  AR.DeferFlag = 0;
2620 
2621  if ( ((WORD *)((UBYTE *)AT.WorkPointer + AM.MaxTer)) > AT.WorkTop ) {
2622  err = -1;
2623  goto cleanup;
2624  MesWork();
2625  }
2626 
2627  if ( NewSort(BHEAD0) ) {
2628  err = -1;
2629  goto cleanup;
2630  }
2631  if ( NewSort(BHEAD0) ) {
2632  LowerSortLevel();
2633  err = -1;
2634  goto cleanup;
2635  }
2636 
2637  /*
2638  * Sum up the original $-variable in the master and $-variables on all slaves.
2639  * Note that $-variables on the slaves are set to zero at the beginning of
2640  * the module (See also DoExecute()).
2641  */
2642  for ( j = 0; j < PF.numtasks; j++ ) {
2643  const WORD *r;
2644  for ( r = j == 0 ? Dollars[index].where : VectorPtr(b[j - 1].buf); *r; r += *r ) {
2645  WCOPY(AT.WorkPointer, r, *r);
2646  AT.WorkPointer += *r;
2647  AR.Cnumlhs = 0;
2648  if ( Generator(BHEAD oldwork, 0) ) {
2650  err = -1;
2651  goto cleanup;
2652  }
2653  AT.WorkPointer = oldwork;
2654  }
2655  }
2656 
2657  size = EndSort(BHEAD (WORD *)&dbuf, 2);
2658  if ( size < 0 ) {
2659  LowerSortLevel();
2660  err = -1;
2661  goto cleanup;
2662  }
2663  LowerSortLevel();
2664 
2665  /* Find special cases. */
2666  type = DOLTERMS;
2667  if ( dbuf[0] == 0 ) {
2668  type = DOLZERO;
2669  }
2670  else if ( dbuf[dbuf[0]] == 0 ) {
2671  const WORD *t = dbuf, *w;
2672  WORD n, nsize;
2673  n = *t;
2674  nsize = t[n - 1];
2675  if ( nsize < 0 ) nsize = -nsize;
2676  if ( nsize == n - 1 ) {
2677  nsize = (nsize - 1) / 2;
2678  w = t + 1 + nsize;
2679  if ( *w == 1 ) {
2680  w++; while ( w < t + n - 1 ) { if ( *w ) break; w++; }
2681  if ( w >= t + n - 1 ) type = DOLNUMBER;
2682  }
2683  else if ( n == 7 && t[6] == 3 && t[5] == 1 && t[4] == 1 && t[1] == INDEX && t[2] == 3 ) {
2684  type = DOLINDEX;
2685  d->index = t[3];
2686  }
2687  }
2688  }
2689  copy_dollar(index, type, dbuf, dollarlen(dbuf) + 1);
2690  M_free(dbuf, "temporary dollar buffer");
2691 cleanup:
2692  AR.Cnumlhs = oldnumlhs;
2693  C->numrhs = oldnumrhs;
2694  AR.DeferFlag = olddefer;
2695  AN.cTerm = oldcterm;
2696  AT.WorkPointer = oldwork;
2697 
2698  if ( err ) return err;
2699 /*
2700  #] MODSUM :
2701 */
2702  break;
2703  }
2704  }
2705  if ( d->type == DOLTERMS )
2706  cbuf[AM.dbufnum].CanCommu[index] = numcommute(d->where, &cbuf[AM.dbufnum].NumTerms[index]);
2707  cbuf[AM.dbufnum].rhs[index] = d->where;
2708  nvars++;
2709 #ifdef PF_DEBUG_REDUCE_DOLLAR
2710  MesPrint("<< Reduce $-var: %s", AC.dollarnames->namebuffer + d->name);
2711 #endif
2712  }
2713 /*
2714  #] Master :
2715 */
2716  }
2717  else {
2718 /*
2719  #[ Slave :
2720 */
2722  /* Pack each variable. */
2723  for ( i = 0; i < NumPotModdollars; i++ ) {
2724  WORD index = PotModdollars[i];
2725  DOLLARS d;
2726  if ( !dollar_to_be_collected(index) ) continue;
2727  d = Dollars + index;
2728  PF_LongSinglePack(&d->type, 1, PF_WORD);
2729  if ( d->type != DOLZERO ) {
2730  /*
2731  * NOTE: d->size is the allocated buffer size for d->where in WORDs.
2732  * So dollarlen(d->where) can be < d->size-1. (TU 15 Dec 2011)
2733  */
2734  LONG size = dollarlen(d->where);
2735  PF_LongSinglePack(&size, 1, PF_LONG);
2736  PF_LongSinglePack(d->where, size, PF_WORD);
2737  /* Note that we don't collect factored stuff for max/min/sum variables. */
2738  }
2739  }
2740  PF_LongSingleSend(MASTER, PF_DOLLAR_MSGTAG);
2741 /*
2742  #] Slave :
2743 */
2744  }
2745  return 0;
2746 }
2747 
2748 /*
2749  #] PF_CollectModifiedDollars :
2750  #[ PF_BroadcastModifiedDollars :
2751 */
2752 
2753 /*
2754  #[ dollar_to_be_broadcast :
2755 */
2756 
2761 static inline int dollar_to_be_broadcast(WORD index)
2762 {
2763  switch ( dollar_mod_type(index) ) {
2764  case MODLOCAL:
2765  return 0;
2766  default:
2767  return 1;
2768  }
2769 }
2770 
2771 /*
2772  #] dollar_to_be_broadcast :
2773 */
2774 
2788 {
2789  int i, j, ndollars;
2790  /*
2791  * Count the number of (potentially) modified dollar variables, which we need to broadcast.
2792  * Here we need to broadcast all non-local variables.
2793  */
2794  ndollars = 0;
2795  for ( i = 0; i < NumPotModdollars; i++ ) {
2796  WORD index = PotModdollars[i];
2797  if ( dollar_to_be_broadcast(index) ) ndollars++;
2798  }
2799  if ( ndollars == 0 ) return 0; /* No dollars to be broadcast. */
2800 
2801  if ( PF.me == MASTER ) {
2802 /*
2803  #[ Master :
2804 */
2806  /* Pack each variable. */
2807  for ( i = 0; i < NumPotModdollars; i++ ) {
2808  WORD index = PotModdollars[i];
2809  DOLLARS d;
2810  if ( !dollar_to_be_broadcast(index) ) continue;
2811  d = Dollars + index;
2812  PF_LongMultiPack(&d->type, 1, PF_WORD);
2813  if ( d->type != DOLZERO ) {
2814  /*
2815  * NOTE: d->size is the allocated buffer size for d->where in WORDs.
2816  * So dollarlen(d->where) can be < d->size-1. (TU 15 Dec 2011)
2817  */
2818  LONG size = dollarlen(d->where);
2819  PF_LongMultiPack(&size, 1, PF_LONG);
2820  PF_LongMultiPack(d->where, size, PF_WORD);
2821  /* ...and the factored stuff. */
2822  PF_LongMultiPack(&d->nfactors, 1, PF_WORD);
2823  if ( d->nfactors > 1 ) {
2824  for ( j = 0; j < d->nfactors; j++ ) {
2825  FACDOLLAR *f = &d->factors[j];
2826  PF_LongMultiPack(&f->type, 1, PF_WORD);
2827  PF_LongMultiPack(&f->size, 1, PF_LONG);
2828  if ( f->size > 0 )
2829  PF_LongMultiPack(f->where, f->size, PF_WORD);
2830  else
2831  PF_LongMultiPack(&f->value, 1, PF_WORD);
2832  }
2833  }
2834  }
2835 #ifdef PF_DEBUG_BCAST_DOLLAR
2836  MesPrint(">> Broadcast $-var: %s", AC.dollarnames->namebuffer + d->name);
2837 #endif
2838  }
2839 /*
2840  #] Master :
2841 */
2842  }
2843  if ( PF_LongMultiBroadcast() ) return -1;
2844  if ( PF.me != MASTER ) {
2845 /*
2846  #[ Slave :
2847 */
2848  for ( i = 0; i < NumPotModdollars; i++ ) {
2849  WORD index = PotModdollars[i];
2850  DOLLARS d;
2851  if ( !dollar_to_be_broadcast(index) ) continue;
2852  d = Dollars + index;
2853  /* Clear the contents of the dollar variable. */
2854  if ( d->where && d->where != &AM.dollarzero )
2855  M_free(d->where, "old content of dollar");
2856  d->where = &AM.dollarzero;
2857  d->size = 0;
2858  CleanDollarFactors(d);
2859  /* Unpack and store the contents. */
2860  PF_LongMultiUnpack(&d->type, 1, PF_WORD);
2861  if ( d->type != DOLZERO ) {
2862  LONG size;
2863  PF_LongMultiUnpack(&size, 1, PF_LONG);
2864  d->size = size + 1;
2865  d->where = (WORD *)Malloc1(sizeof(WORD) * d->size, "dollar content");
2866  PF_LongMultiUnpack(d->where, size, PF_WORD);
2867  d->where[size] = 0; /* The null terminator is needed. */
2868  /* ...and the factored stuff. */
2869  PF_LongMultiUnpack(&d->nfactors, 1, PF_WORD);
2870  if ( d->nfactors > 1 ) {
2871  d->factors = (FACDOLLAR *)Malloc1(sizeof(FACDOLLAR) * d->nfactors, "dollar factored stuff");
2872  for ( j = 0; j < d->nfactors; j++ ) {
2873  FACDOLLAR *f = &d->factors[j];
2874  PF_LongMultiUnpack(&f->type, 1, PF_WORD);
2875  PF_LongMultiUnpack(&f->size, 1, PF_LONG);
2876  if ( f->size > 0 ) {
2877  f->where = (WORD *)Malloc1(sizeof(WORD) * (f->size + 1), "dollar factor content");
2878  PF_LongMultiUnpack(f->where, f->size, PF_WORD);
2879  f->where[f->size] = 0; /* The null terminator is needed. */
2880  f->value = 0;
2881  }
2882  else {
2883  f->where = NULL;
2884  PF_LongMultiUnpack(&f->value, 1, PF_WORD);
2885  }
2886  }
2887  }
2888  }
2889  if ( d->type == DOLTERMS )
2890  cbuf[AM.dbufnum].CanCommu[index] = numcommute(d->where, &cbuf[AM.dbufnum].NumTerms[index]);
2891  cbuf[AM.dbufnum].rhs[index] = d->where;
2892  }
2893 /*
2894  #] Slave :
2895 */
2896  }
2897  return 0;
2898 }
2899 
2900 /*
2901  #] PF_BroadcastModifiedDollars :
2902  #] Synchronization of modified dollar variables :
2903  #[ Synchronization of redefined preprocessor variables :
2904  #[ Variables :
2905 */
2906 
2907 /* A buffer used in receivers. */
2908 static Vector(UBYTE, prevarbuf);
2909 
2910 /*
2911  #] Variables :
2912  #[ PF_PackRedefinedPreVars :
2913 */
2914 
2923 static void PF_PackRedefinedPreVars(void)
2924 {
2925  int i;
2926  /* First, pack the number of redefined preprocessor variables. */
2927  int nredefs = 0;
2928  for ( i = 0; i < AC.numpfirstnum; i++ )
2929  if ( AC.inputnumbers[i] >= 0 ) nredefs++;
2930  PF_LongSinglePack(&nredefs, 1, PF_INT);
2931  /* Then, pack each variable. */
2932  for ( i = 0; i < AC.numpfirstnum; i++ )
2933  if ( AC.inputnumbers[i] >= 0) {
2934  WORD index = AC.pfirstnum[i];
2935  UBYTE *value = PreVar[index].value;
2936  int bytes = strlen((char *)value);
2937  PF_LongSinglePack(&index, 1, PF_WORD);
2938  PF_LongSinglePack(&bytes, 1, PF_INT);
2939  PF_LongSinglePack(value, bytes, PF_BYTE);
2940  PF_LongSinglePack(&AC.inputnumbers[i], 1, PF_LONG);
2941  }
2942 }
2943 
2944 /*
2945  #] PF_PackRedefinedPreVars :
2946  #[ PF_UnpackRedefinedPreVars :
2947 */
2948 
2958 static void PF_UnpackRedefinedPreVars(void)
2959 {
2960  int i, j;
2961  /* Unpack the number of redefined preprocessor variables. */
2962  int nredefs;
2963  PF_LongSingleUnpack(&nredefs, 1, PF_INT);
2964  if ( nredefs > 0 ) {
2965  /* Then unpack each variable. */
2966  for ( i = 0; i < nredefs; i++ ) {
2967  WORD index;
2968  int bytes;
2969  UBYTE *value;
2970  LONG inputnumber;
2971  PF_LongSingleUnpack(&index, 1, PF_WORD);
2972  PF_LongSingleUnpack(&bytes, 1, PF_INT);
2973  VectorReserve(prevarbuf, bytes + 1);
2974  value = VectorPtr(prevarbuf);
2975  PF_LongSingleUnpack(value, bytes, PF_BYTE);
2976  value[bytes] = '\0'; /* The null terminator is needed. */
2977  PF_LongSingleUnpack(&inputnumber, 1, PF_LONG);
2978  /* Put this variable if it must be updated. */
2979  for ( j = 0; j < AC.numpfirstnum; j++ )
2980  if ( AC.pfirstnum[j] == index ) break;
2981  if ( AC.inputnumbers[j] < inputnumber ) {
2982  AC.inputnumbers[j] = inputnumber;
2983  PutPreVar(PreVar[index].name, value, NULL, 1);
2984  }
2985  }
2986  }
2987 }
2988 
2989 /*
2990  #] PF_UnpackRedefinedPreVars :
2991  #[ PF_BroadcastRedefinedPreVars :
2992 */
2993 
3005 {
3006  /*
3007  * NOTE: Because the compilation is performed on the all processes
3008  * independently on AC.mparallelflag, we always have to broadcast redefined
3009  * preprocessor variables from the master to the all slaves.
3010  */
3011  if ( PF.me == MASTER ) {
3012 /*
3013  #[ Master :
3014 */
3015  int i, nredefs;
3017  /* First, pack the number of redefined preprocessor variables. */
3018  nredefs = 0;
3019  for ( i = 0; i < AC.numpfirstnum; i++ )
3020  if ( AC.inputnumbers[i] >= 0 ) nredefs++;
3021  PF_LongMultiPack(&nredefs, 1, PF_INT);
3022  /* Then, pack each variable. */
3023  for ( i = 0; i < AC.numpfirstnum; i++ )
3024  if ( AC.inputnumbers[i] >= 0) {
3025  WORD index = AC.pfirstnum[i];
3026  UBYTE *value = PreVar[index].value;
3027  int bytes = strlen((char *)value);
3028  PF_LongMultiPack(&index, 1, PF_WORD);
3029  PF_LongMultiPack(&bytes, 1, PF_INT);
3030  PF_LongMultiPack(value, bytes, PF_BYTE);
3031 #ifdef PF_DEBUG_BCAST_PREVAR
3032  MesPrint(">> Broadcast PreVar: %s = \"%s\"", PreVar[index].name, value);
3033 #endif
3034  }
3035 /*
3036  #] Master :
3037 */
3038  }
3039  if ( PF_LongMultiBroadcast() ) return -1;
3040  if ( PF.me != MASTER ) {
3041 /*
3042  #[ Slave :
3043 */
3044  int i, nredefs;
3045  /* Unpack the number of redefined preprocessor variables. */
3046  PF_LongMultiUnpack(&nredefs, 1, PF_INT);
3047  if ( nredefs > 0 ) {
3048  /* Then unpack each variable and put it. */
3049  for ( i = 0; i < nredefs; i++ ) {
3050  WORD index;
3051  int bytes;
3052  UBYTE *value;
3053  PF_LongMultiUnpack(&index, 1, PF_WORD);
3054  PF_LongMultiUnpack(&bytes, 1, PF_INT);
3055  VectorReserve(prevarbuf, bytes + 1);
3056  value = VectorPtr(prevarbuf);
3057  PF_LongMultiUnpack(value, bytes, PF_BYTE);
3058  value[bytes] = '\0'; /* The null terminator is needed. */
3059  PutPreVar(PreVar[index].name, value, NULL, 1);
3060  }
3061  }
3062 /*
3063  #] Slave :
3064 */
3065  }
3066  return 0;
3067 }
3068 
3069 /*
3070  #] PF_BroadcastRedefinedPreVars :
3071  #] Synchronization of redefined preprocessor variables :
3072  #[ Preprocessor Inside instruction :
3073  #[ Variables :
3074 */
3075 
3076 /* Saved values of AC.RhsExprInModuleFlag, PotModdollars and AC.pfirstnum. */
3077 static WORD oldRhsExprInModuleFlag;
3078 static Vector(WORD, oldPotModdollars);
3079 static Vector(WORD, oldpfirstnum);
3080 
3081 /*
3082  #] Variables :
3083  #[ PF_StoreInsideInfo :
3084 */
3085 
3086 /*
3087  * Saves the current values of AC.RhsExprInModuleFlag, PotModdollars
3088  * and AC.pfirstnum.
3089  *
3090  * Called by DoInside().
3091  *
3092  * @return 0 if OK, nonzero on error.
3093  */
3094 int PF_StoreInsideInfo(void)
3095 {
3096  int i;
3097  oldRhsExprInModuleFlag = AC.RhsExprInModuleFlag;
3098  VectorClear(oldPotModdollars);
3099  for ( i = 0; i < NumPotModdollars; i++ )
3100  VectorPushBack(oldPotModdollars, PotModdollars[i]);
3101  VectorClear(oldpfirstnum);
3102  for ( i = 0; i < AC.numpfirstnum; i++ )
3103  VectorPushBack(oldpfirstnum, AC.pfirstnum[i]);
3104  return 0;
3105 }
3106 
3107 /*
3108  #] PF_StoreInsideInfo :
3109  #[ PF_RestoreInsideInfo :
3110 */
3111 
3112 /*
3113  * Restores the saved values of AC.RhsExprInModuleFlag, PotModdollars
3114  * and AC.pfirstnum.
3115  *
3116  * Called by DoEndInside().
3117  *
3118  * @return 0 if OK, nonzero on error.
3119  */
3120 int PF_RestoreInsideInfo(void)
3121 {
3122  int i;
3123  AC.RhsExprInModuleFlag = oldRhsExprInModuleFlag;
3124  NumPotModdollars = VectorSize(oldPotModdollars);
3125  for ( i = 0; i < NumPotModdollars; i++ )
3126  PotModdollars[i] = VectorPtr(oldPotModdollars)[i];
3127  AC.numpfirstnum = VectorSize(oldpfirstnum);
3128  for ( i = 0; i < AC.numpfirstnum; i++ )
3129  AC.pfirstnum[i] = VectorPtr(oldpfirstnum)[i];
3130  return 0;
3131 }
3132 
3133 /*
3134  #] PF_RestoreInsideInfo :
3135  #] Preprocessor Inside instruction :
3136  #[ PF_BroadcastCBuf :
3137 */
3138 
3146 int PF_BroadcastCBuf(int bufnum)
3147 {
3148  CBUF *C = cbuf + bufnum;
3149  int i;
3150  LONG l;
3151  if ( PF.me == MASTER ) {
3152 /*
3153  #[ Master :
3154 */
3156  /* Pack CBUF struct except pointers. */
3157  PF_LongMultiPack(&C->BufferSize, 1, PF_LONG);
3158  PF_LongMultiPack(&C->numlhs, 1, PF_INT);
3159  PF_LongMultiPack(&C->numrhs, 1, PF_INT);
3160  PF_LongMultiPack(&C->maxlhs, 1, PF_INT);
3161  PF_LongMultiPack(&C->maxrhs, 1, PF_INT);
3162  PF_LongMultiPack(&C->mnumlhs, 1, PF_INT);
3163  PF_LongMultiPack(&C->mnumrhs, 1, PF_INT);
3164  PF_LongMultiPack(&C->numtree, 1, PF_INT);
3165  PF_LongMultiPack(&C->rootnum, 1, PF_INT);
3166  PF_LongMultiPack(&C->MaxTreeSize, 1, PF_INT);
3167  /* Now pointers. Pointer, lhs and rhs are packed as offsets. We don't pack Top. */
3168  l = C->Pointer - C->Buffer;
3169  PF_LongMultiPack(&l, 1, PF_LONG);
3170  PF_LongMultiPack(C->Buffer, l, PF_WORD);
3171  for ( i = 0; i < C->numlhs + 1; i++ ) {
3172  l = C->lhs[i] - C->Buffer;
3173  PF_LongMultiPack(&l, 1, PF_LONG);
3174  }
3175  for ( i = 0; i < C->numrhs + 1; i++ ) {
3176  l = C->rhs[i] - C->Buffer;
3177  PF_LongMultiPack(&l, 1, PF_LONG);
3178  }
3179  PF_LongMultiPack(C->CanCommu, C->maxrhs + 1, PF_LONG);
3180  PF_LongMultiPack(C->NumTerms, C->maxrhs + 1, PF_LONG);
3181  PF_LongMultiPack(C->numdum, C->maxrhs + 1, PF_WORD);
3182  PF_LongMultiPack(C->dimension, C->maxrhs + 1, PF_WORD);
3183  if ( C->MaxTreeSize > 0 )
3184  PF_LongMultiPack(C->boomlijst, (C->numtree + 1) * (sizeof(COMPTREE) / sizeof(int)), PF_INT);
3185 #ifdef PF_DEBUG_BCAST_CBUF
3186  MesPrint(">> Broadcast CBuf %d", bufnum);
3187 #endif
3188 /*
3189  #] Master :
3190 */
3191  }
3192  if ( PF_LongMultiBroadcast() ) return -1;
3193  if ( PF.me != MASTER ) {
3194 /*
3195  #[ Slave :
3196 */
3197  /* First, free already allocated buffers. */
3198  finishcbuf(bufnum);
3199  /* Unpack CBUF struct except pointers. */
3200  PF_LongMultiUnpack(&C->BufferSize, 1, PF_LONG);
3201  PF_LongMultiUnpack(&C->numlhs, 1, PF_INT);
3202  PF_LongMultiUnpack(&C->numrhs, 1, PF_INT);
3203  PF_LongMultiUnpack(&C->maxlhs, 1, PF_INT);
3204  PF_LongMultiUnpack(&C->maxrhs, 1, PF_INT);
3205  PF_LongMultiUnpack(&C->mnumlhs, 1, PF_INT);
3206  PF_LongMultiUnpack(&C->mnumrhs, 1, PF_INT);
3207  PF_LongMultiUnpack(&C->numtree, 1, PF_INT);
3208  PF_LongMultiUnpack(&C->rootnum, 1, PF_INT);
3209  PF_LongMultiUnpack(&C->MaxTreeSize, 1, PF_INT);
3210  /* Allocate new buffers. */
3211  C->Buffer = (WORD *)Malloc1(C->BufferSize * sizeof(WORD), "compiler buffer");
3212  C->Top = C->Buffer + C->BufferSize;
3213  C->lhs = (WORD **)Malloc1(C->maxlhs * sizeof(WORD *), "compiler buffer");
3214  C->rhs = (WORD **)Malloc1(C->maxrhs * (sizeof(WORD *) + 2 * sizeof(LONG) + 2 * sizeof(WORD)), "compiler buffer");
3215  C->CanCommu = (LONG *)(C->rhs + C->maxrhs);
3216  C->NumTerms = C->CanCommu + C->maxrhs;
3217  C->numdum = (WORD *)(C->NumTerms + C->maxrhs);
3218  C->dimension = C->numdum + C->maxrhs;
3219  if ( C->MaxTreeSize > 0 )
3220  C->boomlijst = (COMPTREE *)Malloc1(C->MaxTreeSize * sizeof(COMPTREE), "compiler buffer");
3221  /* Unpack buffers. */
3222  PF_LongMultiUnpack(&l, 1, PF_LONG);
3223  PF_LongMultiUnpack(C->Buffer, l, PF_WORD);
3224  C->Pointer = C->Buffer + l;
3225  for ( i = 0; i < C->numlhs + 1; i++ ) {
3226  PF_LongMultiUnpack(&l, 1, PF_LONG);
3227  C->lhs[i] = C->Buffer + l;
3228  }
3229  for ( i = 0; i < C->numrhs + 1; i++ ) {
3230  PF_LongMultiUnpack(&l, 1, PF_LONG);
3231  C->rhs[i] = C->Buffer + l;
3232  }
3233  PF_LongMultiUnpack(C->CanCommu, C->maxrhs + 1, PF_LONG);
3234  PF_LongMultiUnpack(C->NumTerms, C->maxrhs + 1, PF_LONG);
3235  PF_LongMultiUnpack(C->numdum, C->maxrhs + 1, PF_WORD);
3236  PF_LongMultiUnpack(C->dimension, C->maxrhs + 1, PF_WORD);
3237  if ( C->MaxTreeSize > 0 )
3238  PF_LongMultiUnpack(C->boomlijst, (C->numtree + 1) * (sizeof(COMPTREE) / sizeof(int)), PF_INT);
3239 /*
3240  #] Slave :
3241 */
3242  }
3243  return 0;
3244 }
3245 
3246 /*
3247  #] PF_BroadcastCBuf :
3248  #[ PF_BroadcastExpFlags :
3249 */
3250 
3258 {
3259  WORD i;
3260  EXPRESSIONS e;
3261  if ( PF.me == MASTER ) {
3262 /*
3263  #[ Master :
3264 */
3266  PF_LongMultiPack(&AR.expflags, 1, PF_WORD);
3267  for ( i = 0; i < NumExpressions; i++ ) {
3268  e = &Expressions[i];
3269  PF_LongMultiPack(&e->counter, 1, PF_WORD);
3270  PF_LongMultiPack(&e->vflags, 1, PF_WORD);
3271  PF_LongMultiPack(&e->numdummies, 1, PF_WORD);
3272  PF_LongMultiPack(&e->numfactors, 1, PF_WORD);
3273 #ifdef PF_DEBUG_BCAST_EXPRFLAGS
3274  MesPrint(">> Broadcast ExprFlags: %s", AC.exprnames->namebuffer + e->name);
3275 #endif
3276  }
3277 /*
3278  #] Master :
3279 */
3280  }
3281  if ( PF_LongMultiBroadcast() ) return -1;
3282  if ( PF.me != MASTER ) {
3283 /*
3284  #[ Slave :
3285 */
3286  PF_LongMultiUnpack(&AR.expflags, 1, PF_WORD);
3287  for ( i = 0; i < NumExpressions; i++ ) {
3288  e = &Expressions[i];
3289  PF_LongMultiUnpack(&e->counter, 1, PF_WORD);
3290  PF_LongMultiUnpack(&e->vflags, 1, PF_WORD);
3291  PF_LongMultiUnpack(&e->numdummies, 1, PF_WORD);
3292  PF_LongMultiUnpack(&e->numfactors, 1, PF_WORD);
3293  }
3294 /*
3295  #] Slave :
3296 */
3297  }
3298  return 0;
3299 }
3300 
3301 /*
3302  #] PF_BroadcastExpFlags :
3303  #[ PF_SetScratch :
3304 */
3305 
3312 static void PF_SetScratch(FILEHANDLE *f,POSITION *position)
3313 {
3314  if(
3315  ( f->handle >= 0) && ISGEPOS(*position,f->POposition) &&
3316  ( ISGEPOSINC(*position,f->POposition,(f->POfull-f->PObuffer)*sizeof(WORD)) ==0 )
3317  )/*position is inside the buffer! SetScratch() will do nothing.*/
3318  f->POfull=f->PObuffer;/*force SetScratch() to re-read the position from the beginning:*/
3319  SetScratch(f,position);
3320 }
3321 
3322 /*
3323  #] PF_SetScratch :
3324  #[ PF_pushScratch :
3325 */
3326 
3333 static int PF_pushScratch(FILEHANDLE *f)
3334 {
3335  LONG size,RetCode;
3336  if ( f->handle < 0){
3337  /*Create the file*/
3338  if ( ( RetCode = CreateFile(f->name) ) >= 0 ) {
3339  f->handle = (WORD)RetCode;
3340  PUTZERO(f->filesize);
3341  PUTZERO(f->POposition);
3342  }
3343  else{
3344  MesPrint("Cannot create scratch file %s",f->name);
3345  return(-1);
3346  }
3347  }/*if ( f->handle < 0)*/
3348  size = (f->POfill-f->PObuffer)*sizeof(WORD);
3349  if( size > 0 ){
3350  SeekFile(f->handle,&(f->POposition),SEEK_SET);
3351  if ( WriteFile(f->handle,(UBYTE *)(f->PObuffer),size) != size ){
3352  MesPrint("Error while writing to disk. Disk full?");
3353  return(-1);
3354  }
3355  ADDPOS(f->filesize,size);
3356  ADDPOS(f->POposition,size);
3357  f->POfill = f->POfull=f->PObuffer;
3358  }/*if( size > 0 )*/
3359  return(0);
3360 }
3361 
3362 /*
3363  #] PF_pushScratch :
3364  #[ Broadcasting RHS expressions :
3365  #[ PF_WalkThroughExprMaster :
3366  Returns <=0 if the expression is ready, or dl+1;
3367 */
3368 
3369 static int PF_WalkThroughExprMaster(FILEHANDLE *curfile, int dl)
3370 {
3371  LONG l=0;
3372  for(;;){
3373  if(curfile->POfull-curfile->POfill < dl){
3374  POSITION pos;
3375  SeekScratch(curfile,&pos);
3376  PF_SetScratch(curfile,&pos);
3377  }/*if(curfile->POfull-curfile->POfill < dl)*/
3378  curfile->POfill+=dl;
3379  l+=dl;
3380  if( l >= PF.exprbufsize){
3381  if( l == PF.exprbufsize){
3382  if( *(curfile->POfill) == 0)/*expression is ready*/
3383  return(0);
3384  }
3385  l-=PF.exprbufsize;
3386  curfile->POfill-=l;
3387  return l+1;
3388  }
3389 
3390  dl=*(curfile->POfill);
3391  if(dl == 0)
3392  return l-PF.exprbufsize;
3393 
3394  if(dl<0){/*compressed term*/
3395  if(curfile->POfull-curfile->POfill < 1){
3396  POSITION pos;
3397  SeekScratch(curfile,&pos);
3398  PF_SetScratch(curfile,&pos);
3399  }/*if(curfile->POfull-curfile->POfill < 1)*/
3400  dl=*(curfile->POfill+1)+2;
3401  }/*if(*(curfile->POfill)<0)*/
3402  }/*for(;;)*/
3403 }
3404 
3405 /*
3406  #] PF_WalkThroughExprMaster :
3407  #[ PF_WalkThroughExprSlave :
3408  Returns <=0 if the expression is ready, or dl+1;
3409 */
3410 
3411 static int PF_WalkThroughExprSlave(FILEHANDLE *curfile, LONG *counter, int dl)
3412 {
3413  LONG l=0;
3414  for(;;){
3415  if(curfile->POstop-curfile->POfill < dl){
3416  if(PF_pushScratch(curfile))
3417  return(-PF.exprbufsize-1);
3418  }
3419  curfile->POfill+=dl;
3420  curfile->POfull=curfile->POfill;
3421  l+=dl;
3422  if( l >= PF.exprbufsize){
3423  if( l == PF.exprbufsize){
3424  /*
3425  * This access is valid because PF.exprbufsize+1 WORDs are
3426  * broadcasted, this shortcut is not mandatory though. (TU 15 Sep 2011)
3427  */
3428  if( *(curfile->POfill) == 0)/*expression is ready*/
3429  return(0);
3430  }
3431  l-=PF.exprbufsize;
3432  curfile->POfill-=l;
3433  curfile->POfull=curfile->POfill;
3434  return l+1;
3435  }
3436 
3437  dl=*(curfile->POfill);
3438  if(dl == 0)
3439  return l-PF.exprbufsize;
3440  (*counter)++;
3441  if(dl<0){/*compressed term*/
3442  if(curfile->POstop-curfile->POfill < 1){
3443  if(PF_pushScratch(curfile))
3444  return(-PF.exprbufsize-1);
3445  }
3446  /*
3447  * This access is always valid because PF.exprbufsize+1 WORDs are
3448  * broadcasted. (TU 15 Sep 2011)
3449  */
3450  dl=*(curfile->POfill+1)+2;
3451  }/*if(*(curfile->POfill)<0)*/
3452  }/*for(;;)*/
3453 }
3454 
3455 /*
3456  #] PF_WalkThroughExprSlave :
3457  #[ PF_rhsBCastMaster :
3458 */
3459 
3467 static int PF_rhsBCastMaster(FILEHANDLE *curfile, EXPRESSIONS e)
3468 {
3469  LONG l=1;/*PF_WalkThroughExpr returns length + 1*/
3470  SetScratch(curfile,&(e->onfile));
3471  do{
3472  /*
3473  * We need to broadcast PF.exprbufsize+1 WORDs because PF_WalkThroughExprSlave
3474  * may access to an additional 1 WORD. It is better to rewrite the routines
3475  * in such a way as to broadcast only PF.exprbufsize WORDs. (TU 15 Sep 2011)
3476  */
3477  if ( curfile->POfull - curfile->POfill < PF.exprbufsize + 1 ) {
3478  POSITION pos;
3479  SeekScratch(curfile,&pos);
3480  PF_SetScratch(curfile,&pos);
3481  }
3482  if ( PF_Bcast(curfile->POfill, (PF.exprbufsize + 1) * sizeof(WORD)) )
3483  return -1;
3484  l=PF_WalkThroughExprMaster(curfile,l-1);
3485  }while(l>0);
3486  if(l<0)/*The tail is extra, decrease POfill*/
3487  curfile->POfill-=l;
3488  return(0);
3489 }
3490 
3491 /*
3492  #] PF_rhsBCastMaster :
3493  #[ PF_rhsBCastSlave :
3494 */
3495 
3504 static int PF_rhsBCastSlave(FILEHANDLE *curfile, EXPRESSIONS e)
3505 {
3506  LONG l=1;/*PF_WalkThroughExpr returns length + 1*/
3507  LONG counter = 0;
3508  do{
3509  /*
3510  * We need to broadcast PF.exprbufsize+1 WORDs because PF_WalkThroughExprSlave
3511  * may access to an additional 1 WORD. It is better to rewrite the routines
3512  * in such a way as to broadcast only PF.exprbufsize WORDs. (TU 15 Sep 2011)
3513  */
3514  if ( curfile->POstop - curfile->POfill < PF.exprbufsize + 1 ) {
3515  if(PF_pushScratch(curfile))
3516  return(-1);
3517  }
3518  if ( PF_Bcast(curfile->POfill, (PF.exprbufsize + 1) * sizeof(WORD)) )
3519  return(-1);
3520  l = PF_WalkThroughExprSlave(curfile, &counter, l - 1);
3521  }while(l>0);
3522  if(l<0){/*The tail is extra, decrease POfill*/
3523  if(l<-PF.exprbufsize)/*error due to a PF_pushScratch() failure */
3524  return(-1);
3525  curfile->POfill-=l;
3526  }
3527  if ( curfile->handle >= 0 ) {
3528  if ( PF_pushScratch(curfile) ) return -1;
3529  }
3530  curfile->POfull=curfile->POfill;
3531  if ( curfile != AR.hidefile ) AR.InInBuf = curfile->POfull-curfile->PObuffer;
3532  else AR.InHiBuf = curfile->POfull-curfile->PObuffer;
3533  CHECK(counter == e->counter + 1); /* The first term is the prototype. */
3534  return(0);
3535 }
3536 
3537 /*
3538  #] PF_rhsBCastSlave :
3539  #[ PF_BroadcastExpr :
3540 */
3541 
3550 {
3551  if ( PF.me == MASTER ) {
3552  if ( PF_rhsBCastMaster(file, e) ) return -1;
3553 #ifdef PF_DEBUG_BCAST_RHSEXPR
3554  MesPrint(">> Broadcast RhsExpr: %s", AC.exprnames->namebuffer + e->name);
3555 #endif
3556  }
3557  else {
3558  POSITION pos;
3559  SetEndHScratch(file, &pos);
3560  e->onfile = pos;
3561  if ( PF_rhsBCastSlave(file, e) ) return -1;
3562  }
3563  return 0;
3564 }
3565 
3566 /*
3567  #] PF_BroadcastExpr :
3568  #[ PF_BroadcastRHS :
3569 */
3570 
3578 {
3579  int i;
3580  for ( i = 0; i < NumExpressions; i++ ) {
3581  EXPRESSIONS e = &Expressions[i];
3582  if ( !(e->vflags & ISINRHS) ) continue;
3583  switch ( e->status ) {
3584  case LOCALEXPRESSION:
3585  case SKIPLEXPRESSION:
3586  case DROPLEXPRESSION:
3587  case GLOBALEXPRESSION:
3588  case SKIPGEXPRESSION:
3589  case DROPGEXPRESSION:
3590  case HIDELEXPRESSION:
3591  case HIDEGEXPRESSION:
3592  case INTOHIDELEXPRESSION:
3593  case INTOHIDEGEXPRESSION:
3594  if ( PF_BroadcastExpr(e, AR.infile) ) return -1;
3595  break;
3596  case HIDDENLEXPRESSION:
3597  case HIDDENGEXPRESSION:
3598  case DROPHLEXPRESSION:
3599  case DROPHGEXPRESSION:
3600  case UNHIDELEXPRESSION:
3601  case UNHIDEGEXPRESSION:
3602  if ( PF_BroadcastExpr(e, AR.hidefile) ) return -1;
3603  break;
3604  }
3605  }
3606  if ( PF.me != MASTER )
3607  UpdatePositions();
3608  return 0;
3609 }
3610 
3611 /*
3612  #] PF_BroadcastRHS :
3613  #] Broadcasting RHS expressions :
3614  #[ InParallel mode :
3615  #[ PF_InParallelProcessor :
3616 */
3617 
3625 {
3626  GETIDENTITY
3627  int i, next,tag;
3628  EXPRESSIONS e;
3629  if(PF.me == MASTER){
3630  if ( PF.numtasks >= 3 ) {
3631  partodoexr = (WORD*)Malloc1(sizeof(WORD)*(PF.numtasks+1),"PF_InParallelProcessor");
3632  for ( i = 0; i < NumExpressions; i++ ) {
3633  e = Expressions+i;
3634  if ( e->partodo <= 0 ) continue;
3635  if ( e->counter == 0 ) { /* Expression with zero terms */
3636  e->partodo = 0;
3637  continue;
3638  }
3639  switch(e->status){
3640  case LOCALEXPRESSION:
3641  case GLOBALEXPRESSION:
3642  case UNHIDELEXPRESSION:
3643  case UNHIDEGEXPRESSION:
3644  case INTOHIDELEXPRESSION:
3645  case INTOHIDEGEXPRESSION:
3646  tag=PF_ANY_SOURCE;
3647  next=PF_Wait4SlaveIP(&tag);
3648  if(next<0)
3649  return(-1);
3650  if(tag == PF_DATA_MSGTAG){
3651  PF_Statistics(PF_stats,0);
3652  if(PF_Slave2MasterIP(next))
3653  return(-1);
3654  }
3655  if(PF_Master2SlaveIP(next,e))
3656  return(-1);
3657  partodoexr[next]=i;
3658  break;
3659  default:
3660  e->partodo = 0;
3661  continue;
3662  }/*switch(e->status)*/
3663  }/*for ( i = 0; i < NumExpressions; i++ )*/
3664  /*Here some slaves are working, other are waiting on PF_Send.
3665  Wait all of them.*/
3666  /*At this point no new slaves may be launched so PF_WaitAllSlaves()
3667  does not modify partodoexr[].*/
3668  if(PF_WaitAllSlaves())
3669  return(-1);
3670 
3671  if ( AC.CollectFun ) AR.DeferFlag = 0;
3672  if(partodoexr){
3673  M_free(partodoexr,"PF_InParallelProcessor");
3674  partodoexr=NULL;
3675  }/*if(partodoexr)*/
3676  }/*if ( PF.numtasks >= 3 ) */
3677  else {
3678  for ( i = 0; i < NumExpressions; i++ ) {
3679  Expressions[i].partodo = 0;
3680  }
3681  }
3682  return(0);
3683  }/*if(PF.me == MASTER)*/
3684  /*Slave:*/
3685  if(PF_Wait4MasterIP(PF_EMPTY_MSGTAG))
3686  return(-1);
3687  /*master is ready to listen to me*/
3688  do{
3689  WORD *oldwork= AT.WorkPointer;
3690  tag=PF_ReadMaster();/*reads directly to its scratch!*/
3691  if(tag<0)
3692  return(-1);
3693  if(tag == PF_DATA_MSGTAG){
3694  oldwork = AT.WorkPointer;
3695 
3696  /* For redefine statements. */
3697  if ( AC.numpfirstnum > 0 ) {
3698  int j;
3699  for ( j = 0; j < AC.numpfirstnum; j++ ) {
3700  AC.inputnumbers[j] = -1;
3701  }
3702  }
3703 
3704  if(PF_DoOneExpr())/*the processor*/
3705  return(-1);
3706  if(PF_Wait4MasterIP(PF_DATA_MSGTAG))
3707  return(-1);
3708  if(PF_Slave2MasterIP(PF.me))/*both master and slave*/
3709  return(-1);
3710  AT.WorkPointer=oldwork;
3711  }/*if(tag == PF_DATA_MSGTAG)*/
3712  }while(tag!=PF_EMPTY_MSGTAG);
3713  PF.exprtodo=-1;
3714  return(0);
3715 }/*PF_InParallelProcessor*/
3716 
3717 /*
3718  #] PF_InParallelProcessor :
3719  #[ PF_Wait4MasterIP :
3720 */
3721 
3722 static int PF_Wait4MasterIP(int tag)
3723 {
3724  int follow = 0;
3725  LONG cpu,space = 0;
3726 
3727  if(PF.log){
3728  fprintf(stderr,"[%d] Starting to send to Master\n",PF.me);
3729  fflush(stderr);
3730  }
3731 
3732  PF_PreparePack();
3733  cpu = TimeCPU(1);
3734  PF_Pack(&cpu ,1,PF_LONG);
3735  PF_Pack(&space ,1,PF_LONG);
3736  PF_Pack(&PF_linterms ,1,PF_LONG);
3737  PF_Pack(&(AM.S0->GenTerms) ,1,PF_LONG);
3738  PF_Pack(&(AM.S0->TermsLeft),1,PF_LONG);
3739  PF_Pack(&follow ,1,PF_INT );
3740 
3741  if(PF.log){
3742  fprintf(stderr,"[%d] Now sending with tag = %d\n",PF.me,tag);
3743  fflush(stderr);
3744  }
3745 
3746  PF_Send(MASTER, tag);
3747 
3748  if(PF.log){
3749  fprintf(stderr,"[%d] returning from send\n",PF.me);
3750  fflush(stderr);
3751  }
3752  return(0);
3753 }
3754 /*
3755  #] PF_Wait4MasterIP :
3756  #[ PF_DoOneExpr :
3757 */
3758 
3766 static int PF_DoOneExpr(void)/*the processor*/
3767 {
3768  GETIDENTITY
3769  EXPRESSIONS e;
3770  int i;
3771  WORD *term;
3772  POSITION position, outposition;
3773  FILEHANDLE *fi, *fout;
3774  LONG dd = 0;
3775  WORD oldBracketOn = AR.BracketOn;
3776  WORD *oldBrackBuf = AT.BrackBuf;
3777  WORD oldbracketindexflag = AT.bracketindexflag;
3778  e = Expressions + PF.exprtodo;
3779  i = PF.exprtodo;
3780  AR.CurExpr = i;
3781  AR.SortType = AC.SortType;
3782  AR.expchanged = 0;
3783  if ( ( e->vflags & ISFACTORIZED ) != 0 ) {
3784  AR.BracketOn = 1;
3785  AT.BrackBuf = AM.BracketFactors;
3786  AT.bracketindexflag = 1;
3787  }
3788 
3789  position = AS.OldOnFile[i];
3790  if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION ) {
3791  AR.GetFile = 2; fi = AR.hidefile;
3792  }
3793  else {
3794  AR.GetFile = 0; fi = AR.infile;
3795  }
3796 /*
3797  PUTZERO(fi->POposition);
3798  if ( fi->handle >= 0 ) {
3799  fi->POfill = fi->POfull = fi->PObuffer;
3800  }
3801 */
3802  SetScratch(fi,&position);
3803  term = AT.WorkPointer;
3804  AR.CompressPointer = AR.CompressBuffer;
3805  AR.CompressPointer[0] = 0;
3806  AR.KeptInHold = 0;
3807  if ( GetTerm(BHEAD term) <= 0 ) {
3808  MesPrint("Expression %d has problems in scratchfile",i);
3809  Terminate(-1);
3810  }
3811  if ( AT.bracketindexflag > 0 ) OpenBracketIndex(i);
3812  term[3] = i;
3813  PUTZERO(outposition);
3814  fout = AR.outfile;
3815  fout->POfill = fout->POfull = fout->PObuffer;
3816  fout->POposition = outposition;
3817  if ( fout->handle >= 0 ) {
3818  fout->POposition = outposition;
3819  }
3820 /*
3821  The next statement is needed because we need the system
3822  to believe that the expression is at position zero for
3823  the moment. In this worker, with no memory of other expressions,
3824  it is. This is needed for when a bracket index is made
3825  because there e->onfile is an offset. Afterwards, when the
3826  expression is written to its final location in the masters
3827  output e->onfile will get its real value.
3828 */
3829  PUTZERO(e->onfile);
3830  if ( PutOut(BHEAD term,&outposition,fout,0) < 0 ) return -1;
3831 
3832  AR.DeferFlag = AC.ComDefer;
3833 
3834 /* AR.sLevel = AB[0]->R.sLevel;*/
3835  term = AT.WorkPointer;
3836  NewSort(BHEAD0);
3837  AR.MaxDum = AM.IndDum;
3838  AN.ninterms = 0;
3839  while ( GetTerm(BHEAD term) ) {
3840  SeekScratch(fi,&position);
3841  AN.ninterms++; dd = AN.deferskipped;
3842  if ( ( e->vflags & ISFACTORIZED ) != 0 && term[1] == HAAKJE ) {
3843  StoreTerm(BHEAD term);
3844  }
3845  else {
3846  if ( AC.CollectFun && *term <= (AM.MaxTer/(2*(LONG)sizeof(WORD))) ) {
3847  if ( GetMoreTerms(term) < 0 ) {
3848  LowerSortLevel(); return(-1);
3849  }
3850  SeekScratch(fi,&position);
3851  }
3852  AT.WorkPointer = term + *term;
3853  AN.RepPoint = AT.RepCount + 1;
3854  if ( AR.DeferFlag ) {
3855  AR.CurDum = AN.IndDum = Expressions[PF.exprtodo].numdummies;
3856  }
3857  else {
3858  AN.IndDum = AM.IndDum;
3859  AR.CurDum = ReNumber(BHEAD term);
3860  }
3861  if ( AC.SymChangeFlag ) MarkDirty(term,DIRTYSYMFLAG);
3862  if ( AN.ncmod ) {
3863  if ( ( AC.modmode & ALSOFUNARGS ) != 0 ) MarkDirty(term,DIRTYFLAG);
3864  else if ( AR.PolyFun ) PolyFunDirty(BHEAD term);
3865  }
3866  if ( ( AR.PolyFunType == 2 ) && ( AC.PolyRatFunChanged == 0 )
3867  && ( e->status == LOCALEXPRESSION || e->status == GLOBALEXPRESSION ) ) {
3868  PolyFunClean(BHEAD term);
3869  }
3870  if ( Generator(BHEAD term,0) ) {
3871  LowerSortLevel(); return(-1);
3872  }
3873  AN.ninterms += dd;
3874  }
3875  SetScratch(fi,&position);
3876  if ( fi == AR.hidefile ) {
3877  AR.InHiBuf = (fi->POfull-fi->PObuffer)
3878  -DIFBASE(position,fi->POposition)/sizeof(WORD);
3879  }
3880  else {
3881  AR.InInBuf = (fi->POfull-fi->PObuffer)
3882  -DIFBASE(position,fi->POposition)/sizeof(WORD);
3883  }
3884  }
3885  AN.ninterms += dd;
3886  if ( EndSort(BHEAD AM.S0->sBuffer,0) < 0 ) return(-1);
3887  e->numdummies = AR.MaxDum - AM.IndDum;
3888  AR.BracketOn = oldBracketOn;
3889  AT.BrackBuf = oldBrackBuf;
3890  if ( ( e->vflags & TOBEFACTORED ) != 0 )
3891  poly_factorize_expression(e);
3892  else if ( ( ( e->vflags & TOBEUNFACTORED ) != 0 )
3893  && ( ( e->vflags & ISFACTORIZED ) != 0 ) )
3894  poly_unfactorize_expression(e);
3895  if ( AM.S0->TermsLeft ) e->vflags &= ~ISZERO;
3896  else e->vflags |= ISZERO;
3897  if ( AR.expchanged == 0 ) e->vflags |= ISUNMODIFIED;
3898 /* if ( AM.S0->TermsLeft ) AR.expflags |= ISZERO;
3899  if ( AR.expchanged ) AR.expflags |= ISUNMODIFIED;*/
3900  AR.GetFile = 0;
3901  AT.bracketindexflag = oldbracketindexflag;
3902 
3903  fout->POfull = fout->POfill;
3904  return(0);
3905 }
3906 
3907 /*
3908  #] PF_DoOneExpr :
3909  #[ PF_Slave2MasterIP :
3910 */
3911 
3912 typedef struct bufIPstruct {
3913  LONG i;
3914  struct ExPrEsSiOn e;
3915 } bufIPstruct_t;
3916 
3917 static int PF_Slave2MasterIP(int src)/*both master and slave*/
3918 {
3919  EXPRESSIONS e;
3920  bufIPstruct_t exprData;
3921  int i,l;
3922  FILEHANDLE *fout=AR.outfile;
3923  POSITION pos;
3924  /*Here we know the length of data to send in advance:
3925  slave has the only one expression in its scratch file, and it sends
3926  this information to the master.*/
3927  if(PF.me != MASTER){/*slave*/
3928  e = Expressions + PF.exprtodo;
3929  /*Fill in the expression data:*/
3930  memcpy(&(exprData.e), e, sizeof(struct ExPrEsSiOn));
3931  SeekScratch(fout,&pos);
3932  exprData.i=BASEPOSITION(pos);
3933  /*Send the metadata:*/
3934  if(PF_RawSend(MASTER,&exprData,sizeof(bufIPstruct_t),0))
3935  return(-1);
3936  i=exprData.i;
3937  SETBASEPOSITION(pos,0);
3938  do{
3939  int blen=PF.exprbufsize*sizeof(WORD);
3940  if(i<blen)
3941  blen=i;
3942  l=PF_SendChunkIP(fout,&pos, MASTER, blen);
3943  /*Here always l == blen!*/
3944  if(l<0)
3945  return(-1);
3946  ADDPOS(pos,l);
3947  i-=l;
3948  }while(i>0);
3949  if ( fout->handle >= 0 ) { /* Now get rid of the file */
3950  CloseFile(fout->handle);
3951  fout->handle = -1;
3952  remove(fout->name);
3953  PUTZERO(fout->POposition);
3954  PUTZERO(fout->filesize);
3955  fout->POfill = fout->POfull = fout->PObuffer;
3956  }
3957  /* Now handle redefined preprocessor variables. */
3958  if ( AC.numpfirstnum > 0 ) {
3960  PF_PackRedefinedPreVars();
3961  PF_LongSingleSend(MASTER, PF_MISC_MSGTAG);
3962  }
3963  return(0);
3964  }/*if(PF.me != MASTER)*/
3965  /*Master*/
3966  /*partodoexr[src] is the number of expression.*/
3967  e = Expressions +partodoexr[src];
3968  /*Get metadata:*/
3969  if (PF_RawRecv(&src, &exprData,sizeof(bufIPstruct_t),&i)!= sizeof(bufIPstruct_t))
3970  return(-1);
3971  /*Fill in the expression data:*/
3972 /* memcpy(e, &(exprData.e), sizeof(struct ExPrEsSiOn)); */
3973  e->counter = exprData.e.counter;
3974  e->vflags = exprData.e.vflags;
3975  e->numdummies = exprData.e.numdummies;
3976  e->numfactors = exprData.e.numfactors;
3977  if ( !(e->vflags & ISZERO) ) AR.expflags |= ISZERO;
3978  if ( !(e->vflags & ISUNMODIFIED) ) AR.expflags |= ISUNMODIFIED;
3979  SeekScratch(fout,&pos);
3980  e->onfile = pos;
3981  i=exprData.i;
3982  while(i>0){
3983  int blen=PF.exprbufsize*sizeof(WORD);
3984  if(i<blen)
3985  blen=i;
3986  l=PF_RecvChunkIP(fout,src,blen);
3987  /*Here always l == blen!*/
3988  if(l<0)
3989  return(-1);
3990  i-=l;
3991  }
3992  /* Now handle redefined preprocessor variables. */
3993  if ( AC.numpfirstnum > 0 ) {
3994  PF_LongSingleReceive(src, PF_MISC_MSGTAG, NULL, NULL);
3995  PF_UnpackRedefinedPreVars();
3996  }
3997  return(0);
3998 }
3999 
4000 /*
4001  #] PF_Slave2MasterIP :
4002  #[ PF_Master2SlaveIP :
4003 */
4004 
4005 static int PF_Master2SlaveIP(int dest, EXPRESSIONS e)
4006 {
4007  bufIPstruct_t exprData;
4008  FILEHANDLE *fi;
4009  POSITION pos;
4010  int l;
4011  LONG ll=0,count=0;
4012  WORD *t;
4013  if(e==NULL){/*Say to the slave that no more job:*/
4014  if(PF_RawSend(dest,&exprData,sizeof(bufIPstruct_t),PF_EMPTY_MSGTAG))
4015  return(-1);
4016  return(0);
4017  }
4018  memcpy(&(exprData.e), e, sizeof(struct ExPrEsSiOn));
4019  exprData.i=e-Expressions;
4020  if ( AC.StatsFlag && AC.OldParallelStats ) {
4021  MesPrint("");
4022  MesPrint(" Sending expression %s to slave %d",EXPRNAME(exprData.i),dest);
4023  }
4024  if(PF_RawSend(dest,&exprData,sizeof(bufIPstruct_t),PF_DATA_MSGTAG))
4025  return(-1);
4026  if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION )
4027  fi = AR.hidefile;
4028  else
4029  fi = AR.infile;
4030  pos=e->onfile;
4031  SetScratch(fi,&pos);
4032  do{
4033  l=PF_SendChunkIP(fi, &pos, dest, PF.exprbufsize*sizeof(WORD));
4034  if(l<0)
4035  return(-1);
4036  t=fi->PObuffer+ (DIFBASE(pos,fi->POposition))/sizeof(WORD);
4037  ll=PF_WalkThrough(t,ll,l/sizeof(WORD),&count);
4038  ADDPOS(pos,l);
4039  }while(ll>-2);
4040  return(0);
4041 }
4042 
4043 /*
4044  #] PF_Master2SlaveIP :
4045  #[ PF_ReadMaster :
4046 */
4047 
4048 static int PF_ReadMaster(void)/*reads directly to its scratch!*/
4049 {
4050  bufIPstruct_t exprData;
4051  int tag,m=MASTER;
4052  EXPRESSIONS e;
4053  FILEHANDLE *fi;
4054  POSITION pos;
4055  LONG count=0;
4056  WORD *t;
4057  LONG ll=0;
4058  int l;
4059  /*Get metadata:*/
4060  if (PF_RawRecv(&m, &exprData,sizeof(bufIPstruct_t),&tag)!= sizeof(bufIPstruct_t))
4061  return(-1);
4062 
4063  if(tag == PF_EMPTY_MSGTAG)/*No data, no job*/
4064  return(tag);
4065 
4066  /*data expected, tag must be == PF_DATA_MSTAG!*/
4067  PF.exprtodo=exprData.i;
4068  e=Expressions + PF.exprtodo;
4069  /*Fill in the expression data:*/
4070 /* memcpy(e, &(exprData.e), sizeof(struct ExPrEsSiOn)); */
4071  if ( e->status == HIDDENLEXPRESSION || e->status == HIDDENGEXPRESSION )
4072  fi = AR.hidefile;
4073  else
4074  fi = AR.infile;
4075  SetEndHScratch(fi,&pos);
4076  e->onfile=AS.OldOnFile[PF.exprtodo]=pos;
4077 
4078  do{
4079  l=PF_RecvChunkIP(fi,MASTER,PF.exprbufsize*sizeof(WORD));
4080  if(l<0)
4081  return(-1);
4082  t=fi->POfull-l/sizeof(WORD);
4083  ll=PF_WalkThrough(t,ll,l/sizeof(WORD),&count);
4084  }while(ll>-2);
4085  /*Now -ll-2 is the number of "extra" elements transferred from the master.*/
4086  fi->POfull-=-ll-2;
4087  fi->POfill=fi->POfull;
4088  return(PF_DATA_MSGTAG);
4089 }
4090 
4091 /*
4092  #] PF_ReadMaster :
4093  #[ PF_SendChunkIP :
4094  thesize is in bytes. Returns the number of sent bytes or <0 on error:
4095 */
4096 
4097 static int PF_SendChunkIP(FILEHANDLE *curfile, POSITION *position, int to, LONG thesize)
4098 {
4099  LONG l=thesize;
4100  if(
4101  ISLESSPOS(*position,curfile->POposition) ||
4102  ISGEPOSINC(*position,curfile->POposition,
4103  ((curfile->POfull-curfile->PObuffer)*sizeof(WORD)-thesize) )
4104  ){
4105  if(curfile->handle< 0)
4106  l=(curfile->POfull-curfile->PObuffer)*sizeof(WORD) - (LONG)(position->p1);
4107  else{
4108  PF_SetScratch(curfile,position);
4109  if(
4110  ISGEPOSINC(*position,curfile->POposition,
4111  ((curfile->POfull-curfile->PObuffer)*sizeof(WORD)-thesize) )
4112  )
4113  l=(curfile->POfull-curfile->PObuffer)*sizeof(WORD) - (LONG)position->p1;
4114  }
4115  }
4116  /*Now we are able to sent l bytes from the
4117  curfile->PObuffer[position-curfile->POposition]*/
4118  if(PF_RawSend(to,curfile->PObuffer+ (DIFBASE(*position,curfile->POposition))/sizeof(WORD),l,0))
4119  return(-1);
4120  return(l);
4121 }
4122 
4123 /*
4124  #] PF_SendChunkIP :
4125  #[ PF_RecvChunkIP :
4126  thesize is in bytes. Returns the number of sent bytes or <0 on error:
4127 */
4128 
4129 static int PF_RecvChunkIP(FILEHANDLE *curfile, int from, LONG thesize)
4130 {
4131  LONG receivedBytes;
4132 
4133  if( (LONG)((curfile->POstop - curfile->POfull)*sizeof(WORD)) < thesize )
4134  if(PF_pushScratch(curfile))
4135  return(-1);
4136  /*Now there is enough space from curfile->POfill to curfile->POstop*/
4137  {/*Block:*/
4138  int tag=0;
4139  receivedBytes=PF_RawRecv(&from,curfile->POfull,thesize,&tag);
4140  }/*:Block*/
4141  if(receivedBytes >= 0 ){
4142  curfile->POfull+=receivedBytes/sizeof(WORD);
4143  curfile->POfill=curfile->POfull;
4144  }/*if(receivedBytes >= 0 )*/
4145  return(receivedBytes);
4146 }
4147 
4148 /*
4149  #] PF_RecvChunkIP :
4150  #[ PF_WalkThrough :
4151  Returns:
4152  >= 0 -- initial offset,
4153  -1 -- the first element of t contains the length of the tail of compressed term,
4154  <= -2 -- -(d+2), where d is the number of extra transferred elements.
4155  Expects:
4156  l -- initial offset or -1,
4157  chunk -- number of transferred elements (not bytes!)
4158  *count -- incremented each time a new term is found
4159 */
4160 
4161 static int PF_WalkThrough(WORD *t, LONG l, LONG chunk, LONG *count)
4162 {
4163  if(l<0) /*==-1!*/
4164  l=(*t)+1;/*the first element of t contains the length of
4165  the tail of compressed term*/
4166  else{
4167  if(l>=chunk)/*next term is out of the chunk*/
4168  return(l-chunk);
4169  t+=l;
4170  chunk-=l;/*note, l was less than chunk so chunk >0!*/
4171  l=*t;
4172  }
4173  /*Main loop:*/
4174  while(l!=0){
4175  if(l>0){/*an offset to the next term*/
4176  if(l<chunk){
4177  t+=l;
4178  chunk-=l;/*note, l was less than chunk so chunk >0!*/
4179  l=*t;
4180  (*count)++;
4181  }/*if(l<chunk)*/
4182  else
4183  return(l-chunk);
4184  }/*if(l>0)*/
4185  else{ /* l<0 */
4186  if(chunk < 2)/*i.e., chunk == 1*/
4187  return(-1);/*the first WORD in the next chunk is length of the tail of the compressed term*/
4188  l=*(t+1)+2;/*+2 since
4189  1. t points to the length field -1,
4190  2. the size of a tail of compressed term is equal to the number of WORDs in this tail*/
4191  }
4192  }/*while(l!=0)*/
4193  return(-1-chunk);/* -(2+(chunk-1)), chunk>0 ! */
4194 }
4195 
4196 /*
4197  #] PF_WalkThrough :
4198  #] InParallel mode :
4199  #[ PF_SendFile :
4200 */
4201 
4202 #define PF_SNDFILEBUFSIZE 4096
4203 
4211 int PF_SendFile(int to, FILE *fd)
4212 {
4213  size_t len=0;
4214  if(fd == NULL){
4215  if(PF_RawSend(to,&to,sizeof(int),PF_EMPTY_MSGTAG))
4216  return(-1);
4217  return(0);
4218  }
4219  for(;;){
4220  char buf[PF_SNDFILEBUFSIZE];
4221  size_t l;
4222  l=fread(buf, 1, PF_SNDFILEBUFSIZE, fd);
4223  len+=l;
4224  if(l==PF_SNDFILEBUFSIZE){
4225  if(PF_RawSend(to,buf,PF_SNDFILEBUFSIZE,PF_BUFFER_MSGTAG))
4226  return(-1);
4227  }
4228  else{
4229  if(PF_RawSend(to,buf,l,PF_ENDBUFFER_MSGTAG))
4230  return(-1);
4231  break;
4232  }
4233  }/*for(;;)*/
4234  return(len);
4235 }
4236 
4237 /*
4238  #] PF_SendFile :
4239  #[ PF_RecvFile :
4240 */
4241 
4249 int PF_RecvFile(int from, FILE *fd)
4250 {
4251  size_t len=0;
4252  int tag;
4253  do{
4254  char buf[PF_SNDFILEBUFSIZE];
4255  int l;
4256  l=PF_RawRecv(&from,buf,PF_SNDFILEBUFSIZE,&tag);
4257  if(l<0)
4258  return(-1);
4259  if(tag == PF_EMPTY_MSGTAG)
4260  return(-1);
4261 
4262  if( fwrite(buf,l,1,fd)!=1 )
4263  return(-1);
4264  len+=l;
4265  }while(tag!=PF_ENDBUFFER_MSGTAG);
4266  return(len);
4267 }
4268 
4269 /*
4270  #] PF_RecvFile :
4271  #[ Synchronised output :
4272  #[ Explanations :
4273 */
4274 
4275 /*
4276  * If the master and slaves output statistics or error messages to the same stream
4277  * or file (e.g., the standard output or the log file) simultaneously, then
4278  * a mixing of their outputs can occur. To avoid this, TFORM uses a lock of
4279  * ErrorMessageLock, but there is no locking functionality in the original MPI
4280  * specification. We need to synchronise the output from the master and slaves.
4281  *
4282  * The idea of the synchronised output (by, e.g., MesPrint()) implemented here is
4283  * Slaves:
4284  * 1. Save the output by WriteFile() (set to PF_WriteFileToFile())
4285  * into some buffers between MLOCK(ErrorMessageLock) and
4286  * MUNLOCK(ErrorMessageLock), which call PF_MLock() and PF_MUnlock(),
4287  * respectively. The output for AM.StdOut and AC.LogHandle are saved to
4288  * the buffers.
4289  * 2. At MUNLOCK(ErrorMessageLock), send the output in the buffer to the master,
4290  * with PF_STDOUT_MSGTAG or PF_LOG_MSGTAG.
4291  * Master:
4292  * 1. Receive the buffered output from slaves, and write them by
4293  * WriteFileToFile().
4294  * The main problem is how and where the master receives messages from
4295  * the slaves (PF_ReceiveErrorMessage()). For this purpose there are three
4296  * helper functions: PF_CatchErrorMessages() and PF_CatchErrorMessagesForAll()
4297  * which remove messages with PF_STDOUT_MSGTAG or PF_LOG_MSGTAG from the top
4298  * of the message queue, and PF_ProbeWithCatchingErrorMessages() which is same as
4299  * PF_Probe() except removing these messages.
4300  */
4301 
4302 /*
4303  #] Explanations :
4304  #[ Variables :
4305 */
4306 
4307 static int errorMessageLock = 0; /* (slaves) The lock count. See PF_MLock() and PF_MUnlock(). */
4308 static Vector(UBYTE, stdoutBuffer); /* (slaves) The buffer for AM.StdOut. */
4309 static Vector(UBYTE, logBuffer); /* (slaves) The buffer for AC.LogHandle. */
4310 #define recvBuffer logBuffer /* (master) The buffer for receiving messages. */
4311 
4312 /*
4313  * If PF_ENABLE_STDOUT_BUFFERING is defined, the master performs the line buffering
4314  * (using stdoutBuffer) at PF_WriteFileToFile().
4315  */
4316 #ifndef PF_ENABLE_STDOUT_BUFFERING
4317 #ifdef UNIX
4318 #define PF_ENABLE_STDOUT_BUFFERING
4319 #endif
4320 #endif
4321 
4322 /*
4323  #] Variables :
4324  #[ PF_MLock :
4325 */
4326 
4330 void PF_MLock(void)
4331 {
4332  /* Only on slaves. */
4333  if ( errorMessageLock++ > 0 ) return;
4334  VectorClear(stdoutBuffer);
4335  VectorClear(logBuffer);
4336 }
4337 
4338 /*
4339  #] PF_MLock :
4340  #[ PF_MUnlock :
4341 */
4342 
4346 void PF_MUnlock(void)
4347 {
4348  /* Only on slaves. */
4349  if ( --errorMessageLock > 0 ) return;
4350  if ( !VectorEmpty(stdoutBuffer) ) {
4351  PF_RawSend(MASTER, VectorPtr(stdoutBuffer), VectorSize(stdoutBuffer), PF_STDOUT_MSGTAG);
4352  }
4353  if ( !VectorEmpty(logBuffer) ) {
4354  PF_RawSend(MASTER, VectorPtr(logBuffer), VectorSize(logBuffer), PF_LOG_MSGTAG);
4355  }
4356 }
4357 
4358 /*
4359  #] PF_MUnlock :
4360  #[ PF_WriteFileToFile :
4361 */
4362 
4375 LONG PF_WriteFileToFile(int handle, UBYTE *buffer, LONG size)
4376 {
4377  if ( PF.me != MASTER && errorMessageLock > 0 ) {
4378  if ( handle == AM.StdOut ) {
4379  VectorPushBacks(stdoutBuffer, buffer, size);
4380  return size;
4381  }
4382  else if ( handle == AC.LogHandle ) {
4383  VectorPushBacks(logBuffer, buffer, size);
4384  return size;
4385  }
4386  }
4387 #ifdef PF_ENABLE_STDOUT_BUFFERING
4388  /*
4389  * On my computer, sometimes a single linefeed "\n" sent to the standard
4390  * output is ignored on the execution of mpiexec. A typical example is:
4391  * $ cat foo.c
4392  * #include <unistd.h>
4393  * int main() {
4394  * write(1, " ", 4);
4395  * write(1, "\n", 1);
4396  * write(1, " ", 4);
4397  * write(1, "123\n", 4);
4398  * return 0;
4399  * }
4400  * or even as a shell script:
4401  * $ cat foo.sh
4402  * #! bin/sh
4403  * printf " "
4404  * printf "\n"
4405  * printf " "
4406  * printf "123\n"
4407  * When I ran it on mpiexec
4408  * $ while :; do mpiexec -np 1 ./foo.sh; done
4409  * I observed the single linefeed (printf "\n") was sometimes ignored. Even
4410  * though this phenomenon might be specific to my environment, I added this
4411  * code because someone may encounter a similar phenomenon and feel it
4412  * frustrating. (TU 16 Jun 2011)
4413  *
4414  * Phenomenon:
4415  * A single linefeed sent to the standard output occasionally ignored
4416  * on mpiexec.
4417  *
4418  * Environment:
4419  * openSUSE 11.4 (x86_64)
4420  * kernel: 2.6.37.6-0.5-desktop
4421  * gcc: 4.5.1 20101208
4422  * mpich2-1.3.2p1 configured with '--enable-shared --with-pm=smpd'
4423  *
4424  * Solution:
4425  * In Unix (in which Uwrite() calls write() system call without any buffering),
4426  * we perform the line buffering here. A single linefeed is also buffered.
4427  *
4428  * XXX:
4429  * At the end of the program the buffered output (text without LF) will not be flushed,
4430  * i.e., will not be written to the standard output. This is not problematic at a normal run.
4431  * The buffer can be explicitly flushed by PF_FlushStdOutBuffer().
4432  */
4433  if ( PF.me == MASTER && handle == AM.StdOut ) {
4434  size_t oldsize;
4435  /* Assume the newline character is LF (when UNIX is defined). */
4436  if ( (size > 0 && buffer[size - 1] != LINEFEED) || (size == 1 && buffer[0] == LINEFEED) ) {
4437  VectorPushBacks(stdoutBuffer, buffer, size);
4438  return size;
4439  }
4440  if ( (oldsize = VectorSize(stdoutBuffer)) > 0 ) {
4441  LONG ret;
4442  VectorPushBacks(stdoutBuffer, buffer, size);
4443  ret = WriteFileToFile(handle, VectorPtr(stdoutBuffer), VectorSize(stdoutBuffer));
4444  VectorClear(stdoutBuffer);
4445  if ( ret < 0 ) {
4446  return ret;
4447  }
4448  else if ( ret < (LONG)oldsize ) {
4449  return 0; /* This means the buffered output in previous calls is lost. */
4450  }
4451  else {
4452  return ret - (LONG)oldsize;
4453  }
4454  }
4455  }
4456 #endif
4457  return WriteFileToFile(handle, buffer, size);
4458 }
4459 
4460 /*
4461  #] PF_WriteFileToFile :
4462  #[ PF_FlushStdOutBuffer :
4463 */
4464 
4470 {
4471 #ifdef PF_ENABLE_STDOUT_BUFFERING
4472  if ( PF.me == MASTER && VectorSize(stdoutBuffer) > 0 ) {
4473  WriteFileToFile(AM.StdOut, VectorPtr(stdoutBuffer), VectorSize(stdoutBuffer));
4474  VectorClear(stdoutBuffer);
4475  }
4476 #endif
4477 }
4478 
4479 /*
4480  #] PF_FlushStdOutBuffer :
4481  #[ PF_ReceiveErrorMessage :
4482 */
4483 
4492 static void PF_ReceiveErrorMessage(int src, int tag)
4493 {
4494  /* Only on the master. */
4495  int size;
4496  int ret = PF_RawProbe(&src, &tag, &size);
4497  CHECK(ret == 0);
4498  switch ( tag ) {
4499  case PF_STDOUT_MSGTAG:
4500  case PF_LOG_MSGTAG:
4501  VectorReserve(recvBuffer, size);
4502  ret = PF_RawRecv(&src, VectorPtr(recvBuffer), size, &tag);
4503  CHECK(ret == size);
4504  if ( size > 0 ) {
4505  int handle = (tag == PF_STDOUT_MSGTAG) ? AM.StdOut : AC.LogHandle;
4506 #ifdef PF_ENABLE_STDOUT_BUFFERING
4507  if ( handle == AM.StdOut ) PF_WriteFileToFile(handle, VectorPtr(recvBuffer), size);
4508  else
4509 #endif
4510  WriteFileToFile(handle, VectorPtr(recvBuffer), size);
4511  }
4512  break;
4513  }
4514 }
4515 
4516 /*
4517  #] PF_ReceiveErrorMessage :
4518  #[ PF_CatchErrorMessages :
4519 */
4520 
4529 static void PF_CatchErrorMessages(int *src, int *tag)
4530 {
4531  /* Only on the master. */
4532  for (;;) {
4533  int asrc = *src;
4534  int atag = *tag;
4535  int ret = PF_RawProbe(&asrc, &atag, NULL);
4536  CHECK(ret == 0);
4537  if ( atag == PF_STDOUT_MSGTAG || atag == PF_LOG_MSGTAG ) {
4538  PF_ReceiveErrorMessage(asrc, atag);
4539  continue;
4540  }
4541  *src = asrc;
4542  *tag = atag;
4543  break;
4544  }
4545 }
4546 
4547 /*
4548  #] PF_CatchErrorMessages :
4549  #[ PF_CatchErrorMessagesForAll :
4550 */
4551 
4556 static void PF_CatchErrorMessagesForAll(void)
4557 {
4558  /* Only on the master. */
4559  int i;
4560  for ( i = 1; i < PF.numtasks; i++ ) {
4561  int src = i;
4562  int tag = PF_ANY_MSGTAG;
4563  PF_CatchErrorMessages(&src, &tag);
4564  }
4565 }
4566 
4567 /*
4568  #] PF_CatchErrorMessagesForAll :
4569  #[ PF_ProbeWithCatchingErrorMessages :
4570 */
4571 
4581 static int PF_ProbeWithCatchingErrorMessages(int *src)
4582 {
4583  for (;;) {
4584  int newsrc = *src;
4585  int tag = PF_Probe(&newsrc);
4586  if ( tag == PF_STDOUT_MSGTAG || tag == PF_LOG_MSGTAG ) {
4587  PF_ReceiveErrorMessage(newsrc, tag);
4588  continue;
4589  }
4590  if ( tag > 0 ) *src = newsrc;
4591  return tag;
4592  }
4593 }
4594 
4595 /*
4596  #] PF_ProbeWithCatchingErrorMessages :
4597  #[ PF_FreeErrorMessageBuffers :
4598 */
4599 
4606 {
4607  VectorFree(stdoutBuffer);
4608  VectorFree(logBuffer);
4609 }
4610 
4611 /*
4612  #] PF_FreeErrorMessageBuffers :
4613  #] Synchronised output :
4614 */
int NormalModulus(UWORD *, WORD *)
Definition: reken.c:1370
LONG * NumTerms
Definition: structs.h:915
VOID AddArgs(PHEAD WORD *, WORD *, WORD *)
Definition: sort.c:2115
int PF_Init(int *argc, char ***argv)
Definition: parallel.c:1945
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:1503
#define VectorInit(X)
Definition: vector.h:113
#define VectorStruct(T)
Definition: vector.h:65
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
Definition: pre.c:549
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:642
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
Definition: mpi.c:1583
int PF_BroadcastCBuf(int bufnum)
Definition: parallel.c:3146
#define VectorReserve(X, newcapacity)
Definition: vector.h:249
#define PACK_LONG(p, n)
Definition: parallel.c:135
#define Vector(T, X)
Definition: vector.h:84
void PF_BroadcastBuffer(WORD **buffer, LONG *length)
Definition: parallel.c:2123
LONG PF_RealTime(int)
Definition: mpi.c:101
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:671
#define VectorPushBacks(X, src, n)
Definition: vector.h:295
int PF_IRecvRbuf(PF_BUFFER *, int, int)
Definition: mpi.c:366
Definition: structs.h:618
LONG PF_RawRecv(int *src, void *buf, LONG thesize, int *tag)
Definition: mpi.c:484
VOID WriteStats(POSITION *, WORD)
Definition: sort.c:91
void PF_MLock(void)
Definition: parallel.c:4330
Definition: parallel.c:265
int PF_BroadcastString(UBYTE *str)
Definition: parallel.c:2165
int PF_BroadcastExpFlags(void)
Definition: parallel.c:3257
int PF_PackString(const UBYTE *str)
Definition: mpi.c:706
WORD PF_Deferred(WORD *term, WORD level)
Definition: parallel.c:1202
int PF_LongMultiBroadcast(void)
Definition: mpi.c:1807
int PF_EndSort(void)
Definition: parallel.c:864
#define VectorFree(X)
Definition: vector.h:130
int PF_LibTerminate(int)
Definition: mpi.c:209
int PF_InParallelProcessor(void)
Definition: parallel.c:3624
void PF_FreeErrorMessageBuffers(void)
Definition: parallel.c:4605
LONG PF_GetSlaveTimes(void)
Definition: parallel.c:2076
WORD ** lhs
Definition: structs.h:912
int PF_BroadcastExpr(EXPRESSIONS e, FILEHANDLE *file)
Definition: parallel.c:3549
#define VectorEmpty(X)
Definition: vector.h:222
#define SWAP(x, y)
Definition: parallel.c:124
int PF_RecvWbuf(WORD *, LONG *, int *)
Definition: mpi.c:337
Definition: structs.h:908
WORD InsertTerm(PHEAD WORD *, WORD, WORD, WORD *, WORD *, WORD)
Definition: proces.c:2350
#define VectorClear(X)
Definition: vector.h:235
Definition: structs.h:281
WORD * Pointer
Definition: structs.h:911
int PF_SendFile(int to, FILE *fd)
Definition: parallel.c:4211
int PF_BroadcastRedefinedPreVars(void)
Definition: parallel.c:3004
WORD StoreTerm(PHEAD WORD *)
Definition: sort.c:4070
WORD * dimension
Definition: structs.h:917
int PF_ISendSbuf(int to, int tag)
Definition: mpi.c:261
int PF_PrepareLongMultiPack(void)
Definition: mpi.c:1643
#define UNPACK_LONG(p, n)
Definition: parallel.c:144
int PF_LongSingleSend(int to, int tag)
Definition: mpi.c:1540
LONG PF_BroadcastNumber(LONG x)
Definition: parallel.c:2096
int PF_RawProbe(int *src, int *tag, int *bytesize)
Definition: mpi.c:508
WORD ** rhs
Definition: structs.h:913
int PF_Probe(int *)
Definition: mpi.c:230
int PF_LibInit(int *, char ***)
Definition: mpi.c:123
int PF_Broadcast(void)
Definition: mpi.c:883
int PF_Bcast(void *buffer, int count)
Definition: mpi.c:440
int PF_PreparePack(void)
Definition: mpi.c:624
Definition: structs.h:1028
WORD * numdum
Definition: structs.h:916
COMPTREE * boomlijst
Definition: structs.h:918
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:1469
VOID LowerSortLevel()
Definition: sort.c:4435
int PF_CollectModifiedDollars(void)
Definition: parallel.c:2508
int PF_Terminate(int errorcode)
Definition: parallel.c:2060
int PF_BroadcastPreDollar(WORD **dbuffer, LONG *newsize, int *numterms)
Definition: parallel.c:2220
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
Definition: sort.c:1300
WORD * Buffer
Definition: structs.h:909
int PF_RawSend(int dest, void *buf, LONG l, int tag)
Definition: mpi.c:463
LONG BufferSize
Definition: structs.h:919
struct NoDe NODE
int PF_Send(int to, int tag)
Definition: mpi.c:822
WORD NewSort(PHEAD0)
Definition: sort.c:553
WORD Generator(PHEAD WORD *, WORD)
Definition: proces.c:2865
WORD * Top
Definition: structs.h:910
LONG PF_WriteFileToFile(int handle, UBYTE *buffer, LONG size)
Definition: parallel.c:4375
#define CHECK(condition)
Definition: parallel.c:153
#define VectorSize(X)
Definition: vector.h:194
WORD FlushOut(POSITION *, FILEHANDLE *, int)
Definition: sort.c:1621
void PF_FlushStdOutBuffer(void)
Definition: parallel.c:4469
int PF_PrepareLongSinglePack(void)
Definition: mpi.c:1451
int PF_UnpackString(UBYTE *str)
Definition: mpi.c:774
int PF_Processor(EXPRESSIONS e, WORD i, WORD LastExpression)
Definition: parallel.c:1534
WORD CompCoef(WORD *, WORD *)
Definition: reken.c:3012
int PF_BroadcastModifiedDollars(void)
Definition: parallel.c:2787
int PF_RecvFile(int from, FILE *fd)
Definition: parallel.c:4249
int PF_Receive(int src, int tag, int *psrc, int *ptag)
Definition: mpi.c:848
void PF_MUnlock(void)
Definition: parallel.c:4346
#define VectorPtr(X)
Definition: vector.h:150
int handle
Definition: structs.h:646
LONG EndSort(PHEAD WORD *, int)
Definition: sort.c:632
LONG * CanCommu
Definition: structs.h:914
int PF_WaitRbuf(PF_BUFFER *, int, LONG *)
Definition: mpi.c:400
int PF_BroadcastRHS(void)
Definition: parallel.c:3577
#define VectorPushBack(X, x)
Definition: vector.h:277