
/* Linux NewReno/SACK/FACK/ECN state machine.
 * --------------------------------------
 *
 * "Open"       Normal state, no dubious events, fast path.
 * "Disorder"   In all the respects it is "Open",
 *              but requires a bit more attention. It is entered when
 *              we see some SACKs or dupacks. It is split of "Open"
 *              mainly to move some processing from fast path to slow one.
 * "CWR"        CWND was reduced due to some Congestion Notification event.
 *              It can be ECN, ICMP source quench, local device congestion.
 * "Recovery"   CWND was reduced, we are fast-retransmitting.
 * "Loss"       CWND was reduced due to RTO timeout or SACK reneging.
 *
 * tcp_fastretrans_alert() is entered:
 * - each incoming ACK, if state is not "Open"
 * - when arrived ACK is unusual, namely:
 *      * SACK
 *      * Duplicate ACK.
 *      * ECN ECE.
 */

A seguito di un RTO timeout viene richiamata la tcp_enter_loss().
Vediamola nel dettaglio.

/* Enter Loss state. If "how" is not zero, forget all SACK information
 * and reset tags completely, otherwise preserve SACKs. If receiver
 * dropped its ofo queue, we will know this due to reneging detection.
 */

void tcp_enter_loss(struct sock *sk, int how)
{
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
        struct sk_buff *skb;
        int cnt = 0;

        /* Reduce ssthresh if it has not yet been made inside this window. */

	/* Se siamo in stato Open o Disorder... */

        if (tp->ca_state <= TCP_CA_Disorder ||

	/* oppure se snd_una  uguale a high_seq....
	 * Direttamente da RFC2582 su NewReno. 
	 * This modification uses a new variable "send_high", whose initial value is
         * the initial send sequence number.  After each retransmit timeout, the
	 * highest sequence numbers transmitted so far is recorded in the
         * variable "send_high".
	 * If, after a retransmit timeout, the TCP data sender retransmits three
   	 * consecutive packets that have already been received by the data
   	 * receiver, then the TCP data sender will receive three duplicate
   	 * acknowledgements that do not acknowledge "send_high".  In this case,
   	 * the duplicate acknowledgements are not an indication of a new
   	 * instance of congestion.  They are simply an indication that the
   	 * sender has unnecessarily retransmitted at least three packets.
	 */

            tp->snd_una == tp->high_seq ||
	
	 /* ..oppure se lo stato  Loss e il numero di unrecovered RTOs 
	  * uguale a 0...
	  */

            (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
                tp->prior_ssthresh = tcp_current_ssthresh(tp);
                tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
        }

	/* Si riparte con la slow start adesso avendo messo cwnd a 1 */

        tp->snd_cwnd = 1;
        tp->snd_cwnd_cnt = 0;
        tp->snd_cwnd_stamp = tcp_time_stamp;

	/* Viene azzerato tutto ci che riguarda eventuali ritrasmissioni */

        tcp_clear_retrans(tp);

        /* Push undo marker, if it was plain RTO and nothing
         * was retransmitted. */
        if (!how)
                tp->undo_marker = tp->snd_una;

        for_retrans_queue(skb, sk, tp) {
                cnt++;
                if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
                        tp->undo_marker = 0;
                TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
                if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
		        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
                        TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
                        tp->lost_out++;
                } else {
                        tp->sacked_out++;
                        tp->fackets_out = cnt;
                }
        }
        tcp_sync_left_out(tp);

        tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering);
        tp->ca_state = TCP_CA_Loss;
        tp->high_seq = tp->snd_nxt;
        TCP_ECN_queue_cwr(tp);
}


	/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
	 * The exception is rate halving phase, when cwnd is decreasing towards
	 * ssthresh.
	 */
	static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
	{

		/* In ECN o in Fast Recovery non si tocca sstresh */
		if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
			return tp->snd_ssthresh;
		else

		/* ..altrimenti si valuta
		 * max(sstresh, 0.75 * snd_cwnd)
		 */
			return max(tp->snd_ssthresh,
				   ((tp->snd_cwnd >> 1) +
				    (tp->snd_cwnd >> 2)));
	}


	/* Recalculate snd_ssthresh, we want to set it to:
	 *
	 * 	one half the current congestion window, but no
	 *	less than two segments
	 */
	static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
	{
		return max(tp->snd_cwnd >> 1U, 2U);
	}


Partiamo dalle procedure di undo recovery e in particolare focalizziamo
la nostra attenzione sul recovery di uno stato TCP_CA_Loss in cui la
state machine entra nel caso di un RTO timeout o di un SACK reneging.


static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
{
	/* tp->prior_sstresh  la sstresh salvata alla recovery start. Attenzione! Nel caso di ECN 
         * questa vale 0 (vedi la tcp_fastretrans_alert()). Quindi se prior_sstresh  diversa da 0 
	 * allora snd_cwnd  posta uguale al massimo tra snd_cwnd e snd_sstresh*2. Inoltre se undo
	 *  diverso da 0 (viene richiamata in tal caso con undo=1)  e l'sstresh al momento in cui
	 *  iniziata la congestione  maggiore del valore attuale di sstresh si va a ripristinare
	 * il vecchio valore. Perch? Perch siamo in undo recovery! Chiaro no?
	 */

        if (tp->prior_ssthresh) {
                tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);

                if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
                        tp->snd_ssthresh = tp->prior_ssthresh;
                        TCP_ECN_withdraw_cwr(tp);
                }
        } else {
	
		/* Altrimenti niente moltiplicazione per 2 per sstresh. Qui mi sa tanto che conviene 
		 * leggersi l'RFC per ECN...
		 */

                tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
        }

	/* Notare che la cwnd viene potenzialmente ritoccata nuovamente. Ho letto di questo 
	 * giochino della cwnd moderate increase in un paper. L'idea  che se sono in fase di
	 * recovery e mi ritrovo una cwnd che mi consente di trasmettere molti frame questo
	 * pu dar luogo a un burst.
	 */

        tcp_moderate_cwnd(tp);
        tp->snd_cwnd_stamp = tcp_time_stamp;
}

/* CWND moderation, preventing bursts due to too big ACKs
 * in dubious situations.
 */
static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp)
{

	/* tcp_max_burst() ritorna sempre 3..dalle parole di Alan Cox
	 * "Slow start with delack produces 3 packets of burst, so that
	 * it is safe "de facto"."
	 */

	tp->snd_cwnd = min(tp->snd_cwnd,
			   tcp_packets_in_flight(tp)+tcp_max_burst(tp));
	tp->snd_cwnd_stamp = tcp_time_stamp;
}

NOTA
Un commento  essenziale per comprendere il senso del codice che segue. Nel momento in cui si entra in 
Recovery (uscendo dallo stato Open) nella tcp_fastretrans_alert() si legge il seguente pezzo di codice

[..]
			tp->high_seq = tp->snd_nxt;
                        tp->prior_ssthresh = 0;
                        tp->undo_marker = tp->snd_una;
                        tp->undo_retrans = tp->retrans_out;

                        if (tp->ca_state < TCP_CA_CWR) {
                                if (!(flag&FLAG_ECE))
                                        tp->prior_ssthresh = tcp_current_ssthresh(tp);
                                tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
                                TCP_ECN_queue_cwr(tp);
                        }

                        tp->snd_cwnd_cnt = 0;
			tp->ca_state = TCP_CA_Recovery;
                }

[..]

Quindi come si vede all'inizio della recovery undo_marker viene inizializzato al valore di snd_una
mentre undo_retrans viene inizializzato con il numero di segmenti ritrasmessi in rete. Si noti anche
il ruolo di prior_sstresh esplicato dal seguente codice. 

	/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
	 * The exception is rate halving phase, when cwnd is decreasing towards
	 * ssthresh.
	 */
	static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
	{
		if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
			return tp->snd_ssthresh;
		else
			return max(tp->snd_ssthresh,
				   ((tp->snd_cwnd >> 1) +
				    (tp->snd_cwnd >> 2)));
	}


	/* Recalculate snd_ssthresh, we want to set it to:
	 *
	 * 	one half the current congestion window, but no
	 *	less than two segments
	 */
	static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
	{
		return max(tp->snd_cwnd >> 1U, 2U);
	}




static inline int tcp_may_undo(struct tcp_opt *tp)
{
        return tp->undo_marker &&
                (!tp->undo_retrans || tcp_packet_delayed(tp));
}


/* Nothing was retransmitted or returned timestamp is less
 * than timestamp of the first retransmission.
 */

static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
{
	return !tp->retrans_stamp ||
		(tp->saw_tstamp && tp->rcv_tsecr &&
		 (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
}


/* People celebrate: "We love our President!" */
static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp)
{
        if (tcp_may_undo(tp)) {
                /* Happy end! We did not retransmit anything
                 * or our original transmission succeeded.
                 */
                DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
                tcp_undo_cwr(tp, 1);
                if (tp->ca_state == TCP_CA_Loss)
                        NET_INC_STATS_BH(TCPLossUndo);
                else
                        NET_INC_STATS_BH(TCPFullUndo);
                tp->undo_marker = 0;
        }
    
       if (tp->snd_una == tp->high_seq && IsReno(tp)) {
                /* Hold old state until something *above* high_seq
                 * is ACKed. For Reno it is MUST to prevent false
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                tcp_moderate_cwnd(tp);
                return 1;
        }
        tp->ca_state = TCP_CA_Open;
        return 0;
}

enum tcp_ca_state
	{
		TCP_CA_Open = 0,
	#define TCPF_CA_Open	(1<<TCP_CA_Open)
		TCP_CA_Disorder = 1,
	#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
		TCP_CA_CWR = 2,
	#define TCPF_CA_CWR	(1<<TCP_CA_CWR)
		TCP_CA_Recovery = 3,
	#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
		TCP_CA_Loss = 4
	#define TCPF_CA_Loss	(1<<TCP_CA_Loss)
	};


/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
{
        if (tp->undo_marker && !tp->undo_retrans) {
                DBGUNDO(sk, tp, "D-SACK");
                tcp_undo_cwr(tp, 1);
                tp->undo_marker = 0;
                NET_INC_STATS_BH(TCPDSACKUndo);
        }
}


/* Undo during fast recovery after partial ACK. */

static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
{
        /* Partial ACK arrived. Force Hoe's retransmit. */
        int failed = IsReno(tp) || tp->fackets_out>tp->reordering;

        if (tcp_may_undo(tp)) {
                /* Plain luck! Hole if filled with delayed
                 * packet, rather than with a retransmit.
                 */
                if (tp->retrans_out == 0)
                        tp->retrans_stamp = 0;

                tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);

                DBGUNDO(sk, tp, "Hoe");
                tcp_undo_cwr(tp, 0);
                NET_INC_STATS_BH(TCPPartialUndo);

                /* So... Do not make Hoe's retransmit yet.
                 * If the first packet was delayed, the rest
                 * ones are most probably delayed as well.
                 */
                failed = 0;
        }
        return failed;
}

/* Undo during loss recovery after partial ACK. */
static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
{
        if (tcp_may_undo(tp)) {
                struct sk_buff *skb;
                for_retrans_queue(skb, sk, tp) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
                }
                DBGUNDO(sk, tp, "partial loss");
                tp->lost_out = 0;
                tp->left_out = tp->sacked_out;
                tcp_undo_cwr(tp, 1);
                NET_INC_STATS_BH(TCPLossUndo);
 tp->retransmits = 0;
                tp->undo_marker = 0;
                if (!IsReno(tp))
                        tp->ca_state = TCP_CA_Open;
                return 1;
        }
        return 0;
}

static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
{
        tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
        tp->snd_cwnd_stamp = tcp_time_stamp;
}

static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
{
        tp->left_out = tp->sacked_out;

        if (tp->retrans_out == 0)
                tp->retrans_stamp = 0;

        if (flag&FLAG_ECE)
                tcp_enter_cwr(tp);

        if (tp->ca_state != TCP_CA_CWR) {
                int state = TCP_CA_Open;

                if (tp->left_out ||
                    tp->retrans_out ||
                    tp->undo_marker)
                        state = TCP_CA_Disorder;

                if (tp->ca_state != state) {
                        tp->ca_state = state;
                        tp->high_seq = tp->snd_nxt;
                }
                tcp_moderate_cwnd(tp);
        } else {
                tcp_cwnd_down(tp);
        }
}

