Find and Linear Interpolate Missing Data in Excel
I was hopping someone could give me a hand with linearly interpolating missing data in Excel.
I have data I gathered at different time intervals and in order to manipulate the data, I need it to line up in the rows. I am trying to make a function which finds missing data in a Column and fills it in based on a linear interpolation of the closest value above and the amount of time which has passed. A simple linear interpolation would be suitable to fill in the missing data. I have example data shown below (unfortunately wont let me post an image so comma delineated data is below). For missing data at the beginning or end, it would be OK to leave blank or copy the closest value.
Thank you for the help
Example Data (comma delineated):
Date(days),Time(hours),Data1,Data2,Data3,Data4,Data5,Data6
40684,0.385670139,,,2.0496开发者_运维问答20821,,,
40684,0.385675069,,,,,,0.133139679
40684,0.385680012,,,8.550069731,,,
40684,0.385684954,,,,,,
40684,0.385689884,,,6.308237045,,,
40684,0.385694826,,,,0.012712923,,0.008131037
40684,0.385699769,,,7.00871559,,,
40684,0.385704699,6.153512677,,,0.00487698,,
40684,0.385709641,,12.45628511,5.053182136,,,
40684,0.385714572,,,,,,
40684,0.385719514,,3.908056327,5.79615269,,0.132418943,
40684,0.385724456,,,,,,
40684,0.385729387,,,1.100001911,,0.011925063,
40684,0.385734329,,,,,,
40684,0.385739271,,,4.576754454,,0.114426916,
40684,0.385744201,,,,,,
40684,0.385749144,,,6.64728305,,0.078670107,
40684,0.385754074,,,,,,
40684,0.385759016,,,3.666219416,,,
40684,0.385763958,,,,0.206991693,,
40684,0.385768889,,,7.619782896,,,
40684,0.385773831,,,,,,
40684,0.385778773,,,2.622500183,,,
40684,0.385783704,,,,0.136512025,,
40684,0.385788646,,,4.506286862,,,
40684,0.385793576,,,,,,0.028082778
40684,0.385798519,,,0.28245389,,,
40684,0.385803461,,,,,,0.161361483
40684,0.385808391,,,8.368688368,,,
40684,0.385813333,,,,,,0.001826172
40684,0.385818264,,,2.513474192,,,
40684,0.385823206,,,,,,0.068932257
40684,0.385828148,,,3.908795401,,,
40684,0.385833079,,,,,,
40684,0.385838021,,,6.855867864,,,
40684,0.385842963,,,,,,
40684,0.385847894,,,11.49078845,,,
40684,0.385852836,,,,,,
40684,0.385857766,,,8.035507345,,0.022647571,
40684,0.385862708,9.617499888,,,0.113758152,,
40684,0.385867639,,23.14163433,10.70361681,,0.095718193,
40684,0.385872581,,,,0.073685334,,
40684,0.385877523,,13.93635422,11.24775181,,,
40684,0.385882454,,,,,,
What you need is to interpolate the data with either a linear interpolation, or a cubic spline. The you can align the x-axis.
Depending on the amount of data you will need VBA code to do the math for you. Unfortunately from the data you supplied it seems that there is a lot of noise (or variation) in the data (look at Data6) with big changes over short time that most schemes are going to be unstable. What you might need is to smooth the data first (if you have lots of it) and then interpolate it.
For linear interpolation have the sheet active and run the following
Option Explicit
Public Sub LinearFit()
Dim rx As Range, ry As Range
Dim x() As Variant, y() As Variant
Dim i As Integer, j As Integer, N As Integer, M As Integer
M = 6 'Columns
N = 44 'Rows
Set rx = Range("A2").Resize(N, 2) ' get x values (2 columns)
x = rx.Value 'Get Day/Time array from worksheet
Dim i_low, i_high As Integer
Dim x_low As Double, x_high As Double
Dim y_low As Double, y_high As Double
Dim x_temp As Double
For j = 1 To M 'go through all columns of data
Set ry = Range("C2").Offset(0, j - 1).Resize(N, 1)
y = ry.Value 'Get value array from worksheet
i_low = 1: i_high = 0 'initialize search values
Do 'loop starting empty values
i_high = i_high + 1
Loop While IsEmpty(y(i_high, 1)) And i_high < N
'get x for first non-empty cell
x_high = CDbl(x(i_high, 1)) + 24# * CDbl(x(i_high, 2))
If IsEmpty(y(i_high, 1)) Then
Exit For 'if column is empty leave it alone
Else
y_high = CDbl(y(i_high, 1)) ' get first non-zero value
End If
'Fill empty first cells with constant value
For i = 1 To i_high - 1
y(i, 1) = y_high 'fill empty cells with initial value
Next i
Do 'loop through sections of empty cells
i_low = i_high
x_low = x_high
y_low = y_high
'Find next empty block if it exists
Do
i_high = i_high + 1
Loop While IsEmpty(y(i_high, 1)) And i_high < N
'get x for next block end
x_high = CDbl(x(i_high, 1)) + 24# * CDbl(x(i_high, 2))
If IsEmpty(y(i_high, 1)) Then
y_high = y_low 'if cells empty to end use last value
Else
y_high = CDbl(y(i_high, 1)) 'find next value
End If
For i = i_low To i_high
' x value of interest
x_temp = CDbl(x(i, 1)) + 24# * CDbl(x(i, 2))
' Linear Interpolation (see wikipedia for details)
y(i, 1) = y_low + (y_high - y_low) * (x_temp - x_low) / (x_high - x_low)
Next i
Loop While i_high < N
ry.Value = y 'put array back into worksheet
Next j
End Sub
and you will see values filled in like this:
Date(days) Time(hours) Data1 Data2 Data3 Data4 Data5 Data6
40684 0.385670139 6.153512677 12.45628511 2.049620821 0.012712923 0.132418943 0.133139679
40684 0.385675069 6.153512677 12.45628511 5.295565628 0.012712923 0.132418943 0.133139679
40684 0.385680012 6.153512677 12.45628511 8.550069731 0.012712923 0.132418943 0.101863791
40684 0.385684954 6.153512677 12.45628511 7.427790828 0.012712923 0.132418943 0.070594229
40684 0.385689884 6.153512677 12.45628511 6.308237045 0.012712923 0.132418943 0.039400597
40684 0.385694826 6.153512677 12.45628511 6.658440881 0.012712923 0.132418943 0.008131037
40684 0.385699769 6.153512677 12.45628511 7.00871559 0.008789793 0.132418943 0.009129735
40684 0.385704699 6.153512677 12.45628511 6.032137384 0.00487698 0.132418943 0.010125807
40684 0.385709641 6.261854761 12.45628511 5.053182136 0.021732662 0.132418943 0.011124303
40684 0.385714572 6.369955702 8.186932606 5.424253533 0.038550828 0.132418943 0.012120577
I believe I have come up with a way to do it in the cell. This formula will work from the second row down for time in column A, gappy data in column B, interpolated data in column C. Note that but fails on the last couple of rows as there may be no more values to find.
{=IF(ISBLANK(B2), C1 + (OFFSET(B2, IF(ISBLANK(B2), MATCH(1, IF(ISBLANK(B2:B$999), 0, 1), 0), B2), 0) - C1) / (OFFSET($A2, IF(ISBLANK(B2), MATCH(1, IF(ISBLANK(B2:B$999), 0, 1), 0), B2), 0) - $A1) * ($A2 - $A1), B2)}
精彩评论