--- a/backup.py Sun Jan 21 22:46:05 2018 +0000 +++ b/backup.py Sun Jan 21 23:52:35 2018 +0000 @@ -13,6 +13,17 @@ logger=borgend.logger.getChild(__name__) +# State +INACTIVE=0 +SCHEDULED=1 +ACTIVE=2 +BUSY=3 +OFFLINE=4 +ERRORS=5 + +def combine_state(state1, state2): + return max(state1, state2) + loglevel_translation={ 'CRITICAL': logging.CRITICAL, 'ERROR': logging.ERROR, @@ -111,7 +122,6 @@ self.config=config self.lastrun_when=None - self.lastrun_errors=None self.borg_instance=None self.current_operation=None self.thread_log=None @@ -120,6 +130,7 @@ self.scheduled_operation=None self.lock=Lock() self.__status_update_callback=None + self.state=INACTIVE def is_running(self): with self.lock: @@ -185,8 +196,14 @@ logger.log(lvl, status['name'] + ': ' + status['message']) if lvl>=logging.WARNING: errors_this_message=status + state=ERRORS + if ('msgid' in status and + (status['msgid']=='LockTimeout' or # observed in reality + status['msgid']=='LockErrorT' or # in docs + status['msgid']=='LockErrorT')): # in docs + state=BUSY with self.lock: - self.current_operation['errors']=True + self.state=combine_state(self.state, state) status, callback=self.__status_unlocked() else: logger.debug('Unrecognised log entry %s' % str(status)) @@ -210,30 +227,39 @@ res=self.borg_instance.read_result() - errors=False + # Finish processing remaining errors + self.thread_log.join() + + with self.lock: + state=self.state + + # If there were no errors, reset back to INACTIVE state + if state==ACTIVE: + state=INACTIVE logger.debug('Borg result: %s' % str(res)) - if res is None: - errors=True + if res is None and state==INACTIVE: + logger.error('No result from borg despite no error in log') + state=ERRORS logger.debug('Waiting for borg subprocess to terminate in result thread') - errors=errors or not self.borg_instance.wait() + if not self.borg_instance.wait(): + logger.critical('Borg subprocess did not terminate') + state=combine_state(state, ERRORS) - logger.debug('Borg subprocess terminated (errors: %s); terminating result listener thread' % str(errors)) - - self.thread_log.join() + logger.debug('Borg subprocess terminated (end state %s); terminating result listener thread' % str(state)) with self.lock: if self.current_operation['operation']=='create': self.lastrun_when=self.current_operation['when_monotonic'] - self.lastrun_errors=errors self.thread_res=None self.thread_log=None self.borg_instance=None self.current_operation=None self.__schedule_unlocked() + self.state=state status, callback=self.__status_unlocked() if callback: callback(self, status) @@ -258,6 +284,7 @@ self.queue=queue self.current_operation=op self.current_operation['when_monotonic']=time.monotonic() + self.state=ACTIVE t_log.start() t_res.start() @@ -294,7 +321,7 @@ except Exception as err: logger.debug('Rescheduling after failure') self.lastrun_when=time.monotonic() - self.lastrun_errors=True + self.state=ERRORS self.__schedule_unlocked() raise err @@ -369,7 +396,7 @@ return {'operation': 'create', 'detail': 'initial', 'when_monotonic': now+initial_interval} - elif self.lastrun_errors: + elif self.state>=BUSY: if self.retry_interval==0: return None else: @@ -398,6 +425,7 @@ tmr=Timer(delay, self.__queue_timed_operation) self.scheduled_operation=op self.timer=tmr + self.state=combine_state(self.state, SCHEDULED) tmr.start() return op @@ -425,17 +453,12 @@ else: status={'type': 'nothing'} - if self.lastrun_errors is not None: - status['errors']=self.lastrun_errors + status['name']=self.name + status['state']=self.state if 'detail' not in status: status['detail']='NONE' - if 'errors' not in status: - status['errors']=False - - status['name']=self.name - if 'when_monotonic' in status: status['when']=(status['when_monotonic'] -time.monotonic()+time.time())