rink.nu / projects / ananas / kernel/arch/i386/interrupts.S@7c4f300504ea (annotated)
kernel/arch/i386/interrupts.S
author Rink Springer <rink@rink.nu>
Sat Apr 28 11:37:38 2012 +0200 (24 months ago ago)
changeset 1342 7c4f300504ea
parent 1334 1374b6733ab0
child 1343 a41259ab7771
permissions -rw-r--r--
i386: Fix a bug that may result in a cloned thread returning to the wrong address

It turns out that a system call will always use a clean kernel stack upon entry; this is the case because it is not possible to perform a system call from any other context. This implies that we only have to copy the first few bytes that were used to enter the kernel (syscall_int sets these up)

It is plainly wrong to rely on the parent's %esp as it will not point to the initial stack anymore if there was a pre-emption in between: the %esp will point to an inner-stackframe which is no longer valid by the time we return to our syscall code.

The result is that we could execute the pre-empted code twice; this would go unnoticed because we would just adjust the thread's stack again, but in the worst case it sometimes led to hard-to-understand problems since we shouldn't have gotten there with the current thread [1]

This change restructures the code so that we only restore the bare minimum kernel stack needed to return to the thread's caller; this is much more efficient and has the main benefit of not messing things up if the clone is pre-empted.

[1] This reasoning is deliberately vague; I'd need a crystal ball to figure out the exact course of action :-)
rink@0
     1
/*
rink@0
     2
 * Low-level assembly code to pass an interrupt to a higher-level handler.
rink@0
     3
 */
rink@0
     4
.text
rink@0
     5
.globl exception0, exception1, exception2, exception3, exception4, exception5
rink@233
     6
.globl exception6, exception7, exception8, exception9, exception10, exception11
rink@347
     7
.globl exception12, exception13, exception14, exception16, exception17
rink@347
     8
.globl exception18, exception19
rink@109
     9
.globl irq0, irq1, irq2, irq3, irq4, irq5, irq6, irq7, irq8, irq9
rink@234
    10
.globl irq10, irq11, irq12, irq13, irq14, irq15
rink@1334
    11
.globl syscall_int
rink@504
    12
.globl clone_return
rink@1299
    13
.globl userland_trampoline, kthread_trampoline
rink@79
    14
rink@369
    15
#include "options.h"
rink@259
    16
#include "machine/param.h"
rink@259
    17
#include "machine/vm.h"
rink@233
    18
#include "asmsyms.h"
rink@0
    19
rink@1017
    20
#define SANITY_CHECKS
rink@1017
    21
rink@233
    22
#define SAVE_REGISTERS \
rink@233
    23
	movl	%eax, SF_EAX(%esp); \
rink@233
    24
	movl	%ebx, SF_EBX(%esp); \
rink@233
    25
	movl	%ecx, SF_ECX(%esp); \
rink@233
    26
	movl	%edx, SF_EDX(%esp); \
rink@233
    27
	movl	%ebp, SF_EBP(%esp); \
rink@233
    28
	movl	%esp, SF_ESP(%esp); \
rink@233
    29
	movl	%edi, SF_EDI(%esp); \
rink@233
    30
	movl	%esi, SF_ESI(%esp);
rink@233
    31
rink@233
    32
#define SAVE_SEGS \
rink@233
    33
	xorl	%eax, %eax; \
rink@233
    34
	movw	%ds, %ax; \
rink@233
    35
	movl	%eax, SF_DS(%esp); \
rink@233
    36
	movw	%es, %ax; \
rink@233
    37
	movl	%eax, SF_ES(%esp); \
rink@233
    38
	movw	%fs, %ax; \
rink@233
    39
	movl	%eax, SF_FS(%esp); \
rink@233
    40
	movw	%gs, %ax; \
rink@233
    41
	movl	%eax, SF_GS(%esp);
rink@233
    42
rink@233
    43
#define RESTORE_REGISTERS \
rink@233
    44
	movl	SF_EAX(%esp), %eax; \
rink@233
    45
	movl	SF_EBX(%esp), %ebx; \
rink@233
    46
	movl	SF_ECX(%esp), %ecx; \
rink@233
    47
	movl	SF_EDX(%esp), %edx; \
rink@233
    48
	movl	SF_EBP(%esp), %ebp; \
rink@233
    49
	/* movl	SF_ESP(%esp), %esp; */ \
rink@233
    50
	movl	SF_EDI(%esp), %edi; \
rink@233
    51
	movl	SF_ESI(%esp), %esi;
rink@233
    52
rink@233
    53
#define RESTORE_SEGS \
rink@233
    54
	movl	SF_DS(%esp), %eax; \
rink@0
    55
	movw	%ax, %ds; \
rink@233
    56
	movl	SF_ES(%esp), %eax; \
rink@0
    57
	movw	%ax, %es; \
rink@233
    58
	movl	SF_FS(%esp), %eax; \
rink@183
    59
	movw	%ax, %fs; \
rink@233
    60
	movl	SF_GS(%esp), %eax; \
rink@233
    61
	movw	%ax, %gs;
rink@0
    62
rink@234
    63
do_irq:
rink@234
    64
	SAVE_REGISTERS
rink@234
    65
	SAVE_SEGS
rink@142
    66
rink@234
    67
	/* Set up kernel data */
rink@234
    68
	movl	$GDT_SEL_KERNEL_DATA, %eax
rink@234
    69
	movw	%ax, %ds
rink@234
    70
	movw	%ax, %es
rink@142
    71
rink@234
    72
	/* Ensure per-cpu stuff works */
rink@234
    73
	movl	$GDT_SEL_KERNEL_PCPU, %eax
rink@234
    74
	movw	%ax, %fs
rink@234
    75
rink@1331
    76
	/* Increment the nested IRQ count */
rink@1331
    77
	incl	%fs:(PCPU_NESTEDIRQ)
rink@1331
    78
rink@1331
    79
	/* Restore the interrupt flag */
rink@1331
    80
	movl	SF_EFLAGS(%esp), %eax
rink@1331
    81
	testl	$0x200, %eax
rink@1331
    82
	jz	1f
rink@1331
    83
rink@1331
    84
	sti
rink@1331
    85
rink@1331
    86
1:	/* Call the interrupt handler */
rink@997
    87
	pushl	%esp
rink@997
    88
	call	interrupt_handler
rink@997
    89
	addl	$4, %esp
rink@234
    90
rink@234
    91
	/* Restore previous state */
rink@234
    92
	RESTORE_SEGS
rink@234
    93
	RESTORE_REGISTERS
rink@234
    94
	addl	$SF_EIP, %esp
rink@1015
    95
	iret
rink@109
    96
rink@233
    97
do_exception:
rink@233
    98
	SAVE_REGISTERS
rink@233
    99
	SAVE_SEGS
rink@233
   100
rink@233
   101
	/* Set up kernel data */
rink@233
   102
	movl	$GDT_SEL_KERNEL_DATA, %eax
rink@233
   103
	movw	%ax, %ds
rink@233
   104
	movw	%ax, %es
rink@233
   105
rink@233
   106
	/* Ensure per-cpu stuff works */
rink@233
   107
	movl	$GDT_SEL_KERNEL_PCPU, %eax
rink@233
   108
	movw	%ax, %fs
rink@233
   109
rink@1293
   110
	/* Call the exception handler */
rink@943
   111
	pushl	%esp
rink@233
   112
	call	exception_handler
rink@233
   113
	addl	$4, %esp
rink@233
   114
rink@233
   115
	/* Restore previous state */
rink@233
   116
	RESTORE_SEGS
rink@233
   117
	RESTORE_REGISTERS
rink@233
   118
	addl	$SF_EIP, %esp
rink@1015
   119
	iret
rink@233
   120
rink@0
   121
/*
rink@0
   122
 * There exist two versions of exceptions: those with an error code, and those
rink@0
   123
 * without one. Our handler code expects to be called with an error code, so
rink@0
   124
 * we add a fake zero one if needed for those exceptions that don't have one.
rink@0
   125
 */
rink@0
   126
#define EXCEPTION_HANDLER_ERRORCODE(num) \
rink@0
   127
exception ## num: \
rink@233
   128
	subl	$SF_ERRNUM, %esp; \
rink@233
   129
	movl	$num, SF_TRAPNO(%esp); \
rink@233
   130
	jmp	do_exception
rink@0
   131
rink@0
   132
#define EXCEPTION_HANDLER(num) \
rink@0
   133
exception ## num: \
rink@233
   134
	subl	$SF_EIP, %esp; \
rink@233
   135
	movl	$num, SF_TRAPNO(%esp); \
rink@233
   136
	movl	$0, SF_ERRNUM(%esp); \
rink@233
   137
	jmp	do_exception
rink@0
   138
rink@109
   139
/* IRQ handlers are a lot simpler: they come in a single easy flavour */
rink@109
   140
#define IRQ_HANDLER(num) \
rink@109
   141
irq ## num: \
rink@234
   142
	subl	$SF_EIP, %esp; \
rink@234
   143
	movl	$num, SF_TRAPNO(%esp); \
rink@234
   144
	jmp	do_irq
rink@109
   145
rink@79
   146
/*
rink@943
   147
 * System call interrupt; system call number should be placed in %eax, all
rink@943
   148
 * arguments are expected to be placed on the stack.  Note that we only
rink@943
   149
 * preserve what's needed by the System V ABI because that's what everyone
rink@943
   150
 * seems to use anyway.
rink@96
   151
 */
rink@96
   152
syscall_int:
rink@96
   153
	/*
rink@96
   154
	 * System V ABI for Intel386 Architecture says we only have to
rink@1015
   155
	 * preserve %ebx, %esi, %edi, %ebp and %esp (the latter is done
rink@1015
   156
	 * implicitely by the hardware).
rink@96
   157
	 */
rink@1015
   158
	pushl	%ebp
rink@96
   159
	pushl	%ebx
rink@96
   160
	pushl	%esi
rink@96
   161
	pushl	%edi
rink@96
   162
rink@136
   163
	/* Save segment registers as well; we only use ds/es/fs */
rink@136
   164
	pushl	%ds
rink@136
   165
	pushl	%es
rink@136
   166
	pushl	%fs
rink@136
   167
rink@1017
   168
#ifdef SANITY_CHECKS
rink@1017
   169
	/*
rink@1017
   170
	 * Sanity check: a system call must be made from userland, thus %ds
rink@1017
   171
	 * must be userland version
rink@1017
   172
	 */
rink@1017
   173
	movw	%ds, %bx
rink@1017
   174
	cmpw	$(GDT_SEL_USER_DATA + 3), %bx
rink@1017
   175
	je	syscall_start_dsok
rink@1017
   176
rink@1017
   177
	int 	$3
rink@1017
   178
rink@1017
   179
syscall_start_dsok:
rink@1040
   180
	/* Sanity check: interrupts must be enabled when entering */
rink@1040
   181
	movl	36(%esp), %ebx
rink@1040
   182
	andl	$0x200, %ebx
rink@1040
   183
	orl	%ebx, %ebx
rink@1040
   184
	jne	syscall_start_ifok
rink@1040
   185
rink@1040
   186
	int	$3
rink@1040
   187
rink@1040
   188
syscall_start_ifok:
rink@1017
   189
#endif
rink@1017
   190
rink@96
   191
	/*
rink@1015
   192
	 * Switch to kernel segment register context (ds/es/fs); this ensures
rink@1015
   193
	 * the stores below will work (%esi uses %ds implicitely)
rink@1015
   194
	 */
rink@1015
   195
	movw	$GDT_SEL_KERNEL_DATA, %bx
rink@1015
   196
	movw	%bx, %ds
rink@1015
   197
	movw	%bx, %es
rink@1015
   198
	movw	$GDT_SEL_KERNEL_PCPU, %bx
rink@1015
   199
	movw	%bx, %fs
rink@1015
   200
rink@1015
   201
	/*
rink@504
   202
	 * Fetch the caller's stack pointer; this will be saved at our
rink@504
   203
	 * current stack - but note that we have already created a
rink@504
   204
	 * stackframe, so we just use that as base.
rink@504
   205
	 *
rink@1015
   206
	 * The 40 comes from stored fs es ds edi esi ebx ebp eip cs flg esp ss
rink@1015
   207
	 * as they occur on offsets 0  4  8  12  16  20  24  28 32  36  40  44
rink@504
   208
	 */
rink@1015
   209
	movl 40(%esp), %esi
rink@504
   210
rink@504
   211
	/*
rink@96
   212
	 * Set up a pointer to the structure, and copy 6 arguments
rink@96
   213
	 * in place. Note that the stack grows backward, so we need
rink@96
   214
	 * to place them in reverse order.
rink@96
   215
	 */
rink@1015
   216
	pushl	20(%esi)
rink@1015
   217
	pushl	16(%esi)
rink@1015
   218
	pushl	12(%esi)
rink@1015
   219
	pushl	 8(%esi)
rink@1015
   220
	pushl	 4(%esi)
rink@1015
   221
	pushl	%eax
rink@136
   222
rink@96
   223
	/*
rink@96
   224
	 * Invoke the generic syscall hander; return values will be passed in
rink@1015
   225
	 * %eax, so we cannot thrash that. Note that our argument is pushed
rink@1015
   226
	 * beforehand, which simply points to the syscall values.
rink@96
   227
	 */
rink@1015
   228
	pushl	%esp
rink@96
   229
	call	syscall
rink@1015
   230
	addl	$28, %esp	/* 6 regs + esp */
rink@1015
   231
rink@1015
   232
syscall_return:
rink@96
   233
rink@943
   234
	/* Restore registers */
rink@136
   235
	popl	%fs
rink@136
   236
	popl	%es
rink@136
   237
	popl	%ds
rink@1017
   238
rink@1017
   239
#ifdef SANITY_CHECKS
rink@1017
   240
	/*
rink@1017
   241
	 * Sanity check: we must have restored an userland ds; if we don't,
rink@1017
   242
	 * iret will reset %ds to null because it can't be accessed in ring 3,
rink@1017
   243
	 * leading to hard-to-debug problems. Best to verify it here.
rink@1017
   244
	 */
rink@1017
   245
	movw	%ds, %bx
rink@1017
   246
	cmpw	$(GDT_SEL_USER_DATA + 3), %bx
rink@1017
   247
	je	syscall_end_dsok
rink@1017
   248
rink@1017
   249
	int	$3
rink@1017
   250
rink@1017
   251
syscall_end_dsok:
rink@1040
   252
	/* Sanity check: interrupts must be enabled when a syscall is done */
rink@1040
   253
	movl	24(%esp), %ebx
rink@1040
   254
	andl	$0x200, %ebx
rink@1040
   255
	orl	%ebx, %ebx
rink@1040
   256
	jne	syscall_end_ifok
rink@1040
   257
rink@1040
   258
	int	$3
rink@1040
   259
rink@1040
   260
syscall_end_ifok:
rink@1017
   261
#endif
rink@1017
   262
rink@96
   263
	popl	%edi
rink@96
   264
	popl	%esi
rink@96
   265
	popl	%ebx
rink@1015
   266
	popl	%ebp
rink@96
   267
	iret
rink@96
   268
rink@510
   269
clone_return:
rink@510
   270
	/*
rink@782
   271
	 * Once here, the to-be-cloned thread is scheduled to be
rink@1015
   272
	 * resumed. We are returning to a new thread, so we'll
rink@1015
   273
	 * need to restore the context exactly as it were when
rink@1015
   274
	 * the thread did the syscall.
rink@510
   275
	 *
rink@1342
   276
	 * md_thread_clone() will have copied the stackframe as
rink@1342
   277
	 * created by syscall_int to our stack; any extra
rink@1342
   278
	 * stack content is irrelevant since we only need to
rink@1342
   279
	 * return to the thread. Since our stack pointer is
rink@1342
   280
	 * adjusted as if we just made a syscall ourselves, we
rink@1342
   281
	 * can use the generic syscall_return function.
rink@782
   282
	 *
rink@1293
   283
	 * Note that the ABI dictates that %eax is used to return
rink@1342
   284
	 * a value to the new thread, so we'll have to set it here.
rink@510
   285
	 */
rink@782
   286
rink@1342
   287
	/* Fetch return value */
rink@1293
   288
	movl	%fs:(PCPU_CURTHREAD), %ebx
rink@1293
   289
	movl	T_ARG1(%ebx), %eax
rink@1293
   290
rink@1293
   291
	/* And terminate the system call */
rink@1015
   292
	jmp	syscall_return
rink@510
   293
rink@1274
   294
#ifdef OPTION_SMP
rink@142
   295
spurious_irq:
rink@142
   296
	iret
rink@142
   297
#endif
rink@142
   298
rink@0
   299
/* Now we just need to list the exception handlers */
rink@0
   300
EXCEPTION_HANDLER(0)
rink@0
   301
EXCEPTION_HANDLER(1)
rink@0
   302
EXCEPTION_HANDLER(2)
rink@0
   303
EXCEPTION_HANDLER(3)
rink@0
   304
EXCEPTION_HANDLER(4)
rink@0
   305
EXCEPTION_HANDLER(5)
rink@0
   306
EXCEPTION_HANDLER(6)
rink@0
   307
EXCEPTION_HANDLER(7)
rink@0
   308
EXCEPTION_HANDLER_ERRORCODE(8)
rink@0
   309
EXCEPTION_HANDLER(9)
rink@233
   310
EXCEPTION_HANDLER_ERRORCODE(10)
rink@233
   311
EXCEPTION_HANDLER_ERRORCODE(11)
rink@233
   312
EXCEPTION_HANDLER_ERRORCODE(12)
rink@233
   313
EXCEPTION_HANDLER_ERRORCODE(13)
rink@233
   314
EXCEPTION_HANDLER_ERRORCODE(14)
rink@347
   315
EXCEPTION_HANDLER(16)
rink@347
   316
EXCEPTION_HANDLER_ERRORCODE(17)
rink@347
   317
EXCEPTION_HANDLER(18)
rink@347
   318
EXCEPTION_HANDLER(19)
rink@109
   319
rink@109
   320
/* ...and the IRQ handlers */
rink@1293
   321
IRQ_HANDLER(0)
rink@109
   322
IRQ_HANDLER(1)
rink@109
   323
IRQ_HANDLER(2)
rink@109
   324
IRQ_HANDLER(3)
rink@109
   325
IRQ_HANDLER(4)
rink@109
   326
IRQ_HANDLER(5)
rink@109
   327
IRQ_HANDLER(6)
rink@109
   328
IRQ_HANDLER(7)
rink@109
   329
IRQ_HANDLER(8)
rink@109
   330
IRQ_HANDLER(9)
rink@234
   331
IRQ_HANDLER(10)
rink@234
   332
IRQ_HANDLER(11)
rink@234
   333
IRQ_HANDLER(12)
rink@234
   334
IRQ_HANDLER(13)
rink@234
   335
IRQ_HANDLER(14)
rink@234
   336
IRQ_HANDLER(15)
rink@1293
   337
rink@1295
   338
kthread_trampoline:
rink@1295
   339
	/* Fetch arguments; arg1 is the %eip and arg2 is the argument */
rink@1295
   340
	movl	%fs:(PCPU_CURTHREAD), %ebx
rink@1295
   341
	movl	T_ARG1(%ebx), %eax		/* eax = eip */
rink@1295
   342
	pushl	T_ARG2(%ebx)
rink@1295
   343
rink@1295
   344
	/* Call the kthread code with interrupts enabled */
rink@1295
   345
	pushl	$0x200
rink@1299
   346
	pushl	$GDT_SEL_KERNEL_CODE
rink@1295
   347
	pushl	%eax
rink@1295
   348
	iret
rink@1295
   349
rink@1293
   350
userland_trampoline:
rink@1293
   351
	/* Fetch arguments; arg1 is the %eip and arg2 is the argument */
rink@1293
   352
	movl	%fs:(PCPU_CURTHREAD), %ebx
rink@1293
   353
	movl	T_ARG1(%ebx), %eax		/* eax = eip */
rink@1293
   354
	movl	T_ARG2(%ebx), %esi		/* esi = arg */
rink@1293
   355
rink@1293
   356
	/* Set up %ds/%es to point to the userland segments */
rink@1293
   357
	movl	$(GDT_SEL_USER_DATA + 3), %edx
rink@1293
   358
	movl	%dx, %ds
rink@1293
   359
	movl	%dx, %es
rink@1293
   360
	
rink@1299
   361
	pushl	$(GDT_SEL_USER_DATA + 3)			/* ss */
rink@1299
   362
	pushl	$(USERLAND_STACK_ADDR + THREAD_STACK_SIZE)	/* esp */
rink@1299
   363
	pushl	$0x200					/* eflags */
rink@1299
   364
	pushl	$(GDT_SEL_USER_CODE + 3)		/* cs */
rink@1299
   365
	pushl	%eax					/* eip */
rink@1295
   366
	iret
Powered by FreeBSD, PostgreSQL and Perl
© 2001 - 2011 Rink Springer