开发者

how to import csv data into Oracle using c#

How to import csv data into Oracle using c#. Where data to be imported 3GB in size and number of rows 7512263. I've managed to import csv data into Oracle, but the time it takes about 1 hour. How to speed up the time it takes to import csv data into oracle. Thank you. This is my code:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Threading;
using System.Text.RegularExpressions;
using System.IO;
using FileHelpers;
using System.Data.OracleClient;


namespace sqlloader
{
    class Program
    {

        static void Main(string[] args)
        {
            int jum;
            int i;
            bool isFirstLine = false;
            FileHelperEngine engine = new FileHelperEngine(typeof(XL_XDR));

            //Connect To Database
            string constr = "Data Source=(DESCRIPTION=(ADDRESS_LIST="
                 + "(ADDRESS=(PROTOCOL=TCP)(HOST= pt-9a84825594af )(PORT=1521 )))"
                 + "(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=o11g)));"
                 + "User Id=xl;Password=rahasia;";
            OracleConnection con = new OracleConnection(constr);
            con.Open();



            // To Read Use:
            XL_XDR[] res = engine.ReadFile("DataOut.csv") as XL_XDR[];


            jum = CountLinesInFile("DataOut.csv");

            FileInfo f2 = new FileInfo("DataOut.csv");
            long s2 = f2.Length;
            int jmlRecord = jum - 1;

            for (i = 0; i < jum; i++)
            {
                ShowPercentProgress("Processing...", i, jum);
                Thread.Sleep(100);

                if (isFirstLine == false)
                {
                    isFirstLine = true;
                }
                else
                {
                    string sql = "INSERT INTO XL_XDR (XDR_ID, XDR_TYPE, SESSION_START_TIME, SESSION_END_TIME, SESSION_LAST_UPDATE_TIME, " +
                                 "SESSION_FLAG, VERSION, CONNECTION_ROW_COUNT, ERROR_CODE, METHOD, HOST_LEN, HOST, URL_LEN, URL, CONNECTION_START_TIME, " +
                                 "CONNECTION_LAST_UPDATE_TIME, CONNECTION_FLAG, CONNECTION_ID, TOTAL_EVENT_COUNT, TUNNEL_PAIR_ID, RESPONSIVENESS_TYPE, " +
                                 "CLIENT_PORT, PAYLOAD_TYPE, VIRTUAL_TYPE, VID_CLIENT, VID_SERVER, CLIENT_ADDR, SERVER_ADDR, CLIENT_TUNNEL_ADDR, " +
                                 "SERVER_TUNNEL_ADDR, ERROR_CODE_2, IPID, C2S_PKTS, C2S_OCTETS, S2C_PKTS, S2C_OCTETS, NUM_SUCC_TRANS, CONNECT_TIME, " +
                                 "TOTAL_RESP, TIMEOUTS, RETRIES, RAI, TCP_SYNS, TCP_SYN_ACKS, TCP_SYN_RESETS, TCP_SYN_FINS, EVENT_TYPE, FLAGS, TIME_STAMP, " +
                                 "EVENT_ID, EVENT_CODE) VALUES (" +
                                 "'" + res[i].XDR_ID + "', '" + res[i].XDR_TYPE + "', '" + res[i].SESSION_START_TIME + "', '" + res[i].SESSION_END_TIME + "', " +
                                 "'" + res[i].SESSION_LAST_UPDATE_TIME + "', '" + res[i].SESSION_FLAG + "', '" + res[i].VERSION + "', '" + res[i].CONNECTION_ROW_COUNT + "', " +
                                 "'" + res[i].ERROR_CODE + "', '" + res[i].METHOD + "', '" + res[i].HOST_LEN + "', '" + res[i].HOST + "', " +
                                 "'" + res[i].URL_LEN + "', '" + res[i].URL + "', '" + res[i].CONNECTION_START_TIME + "', '" + res[i].CONNECTION_LAST_UPDATE_TIME + "', " +
                                 "'" + res[i].CONNECTION_FLAG + "', '" + res[i].CONNECTION_ID + "', '" + res[i].TOTAL_EVENT_COUNT + "', '" + res[i].TUNNEL_PAIR_ID + "', " +
                                 "'" + res[i].RESPONSIVENESS_TYPE + "', '" + res[i].CLIENT_PORT + "', '" + res[i].PAYLOAD_TYPE + "', '" + res[i].VIRTUAL_TYPE + "', " +
                                 "'" + res[i].VID_CLIENT + "', '" + res[i].VID_SERVER + "', '" + res[i].CLIENT_ADDR + "', '" + res[i].SERVER_ADDR + "', " +
                                 "'" + res[i].CLIENT_TUNNEL_ADDR + "', '" + res[i].SERVER_TUNNEL_ADDR + "', '" + res[i].ERROR_CODE_2 + "', '" + res[i].IPID + "', " +
                                 "'" + res[i].C2S_PKTS + "', '" + res[i].C2S_OCTETS + "', '" + res[i].S2C_PKTS + "', '" + res[i].S2C_OCTETS + "', " +
                                 "'" + res[i].NUM_SUCC_TRANS + "', '" + res[i].CONNECT_TIME + "', '" + res[i].TOTAL_RESP + "', '" + res[i].TIMEOUTS + "', " +
                                 "'" + res[i].RETRIES + "', '" + res[i].RAI + "', '" + res[i].TCP_SYNS + "', '" + res[i].TCP_SYN_ACKS + "', " +
                                 "'" + res[i].TCP_SYN_RESETS + "', '" + res[i].TCP_SYN_FINS + "', '" + res[i].EVENT_TYPE + "', '" + res[i].FLAGS + "', " +
                                 "'" + res[i].TIME_STAMP + "', '" + res[i].EVENT_ID + "', '" + res[i].EVENT_CODE + "')";

                    OracleCommand command = new OracleCommand(sql, con);
                    command.ExecuteNonQuery();

                }




            }

            Console.WriteLine("Successfully Inserted");
            Console.WriteLine();
            Console.WriteLine("Number of Row Data: " + jmlRecord.ToString());
            Console.WriteLine();
            Console.WriteLine("The size of {0} is {1} bytes.", f2.Name, f2.Length);
            con.Close();




        }

        static void ShowPercentProgress(string message, int currElementIndex, int totalElementCount)
        {
            if (currElementIndex < 0 || currElementIndex >= totalElementCount)
            {
  开发者_开发百科              throw new InvalidOperationException("currElement out of range");
            }
            int percent = (100 * (currElementIndex + 1)) / totalElementCount;
            Console.Write("\r{0}{1}% complete", message, percent);
            if (currElementIndex == totalElementCount - 1)
            {
                Console.WriteLine(Environment.NewLine);
            }
        }

        static int CountLinesInFile(string f)
        {
            int count = 0;
            using (StreamReader r = new StreamReader(f))
            {
                string line;
                while ((line = r.ReadLine()) != null)
                {
                    count++;
                }
            }
            return count;
        }

    }

    [DelimitedRecord(",")]
    public class XL_XDR
    {
        public string XDR_ID;
        public string XDR_TYPE;
        public string SESSION_START_TIME;
        public string SESSION_END_TIME;
        public string SESSION_LAST_UPDATE_TIME;
        public string SESSION_FLAG;
        public string VERSION;
        public string CONNECTION_ROW_COUNT;
        public string ERROR_CODE;
        public string METHOD;
        public string HOST_LEN;
        public string HOST;
        public string URL_LEN;
        public string URL;
        public string CONNECTION_START_TIME;
        public string CONNECTION_LAST_UPDATE_TIME;
        public string CONNECTION_FLAG;
        public string CONNECTION_ID;
        public string TOTAL_EVENT_COUNT;
        public string TUNNEL_PAIR_ID;
        public string RESPONSIVENESS_TYPE;
        public string CLIENT_PORT;
        public string PAYLOAD_TYPE;
        public string VIRTUAL_TYPE;
        public string VID_CLIENT;
        public string VID_SERVER;
        public string CLIENT_ADDR;
        public string SERVER_ADDR;
        public string CLIENT_TUNNEL_ADDR;
        public string SERVER_TUNNEL_ADDR;
        public string ERROR_CODE_2;
        public string IPID;
        public string C2S_PKTS;
        public string C2S_OCTETS;
        public string S2C_PKTS;
        public string S2C_OCTETS;
        public string NUM_SUCC_TRANS;
        public string CONNECT_TIME;
        public string TOTAL_RESP;
        public string TIMEOUTS;
        public string RETRIES;
        public string RAI;
        public string TCP_SYNS;
        public string TCP_SYN_ACKS;
        public string TCP_SYN_RESETS;
        public string TCP_SYN_FINS;
        public string EVENT_TYPE;
        public string FLAGS;
        public string TIME_STAMP;
        public string EVENT_ID;
        public string EVENT_CODE;


    }
}


I've had luck using the bulk binding method in ODB.NET. Do a search for "ArrayBindCount" on this page.

http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx

This will allow you to insert everything at once, instead of thousands of tiny inserts. The hours you're currently seeing will turn to minutes.


While strongly seconding Vincent's suggestion to use bind variables (which is likely going to be a huge performance gain as well as preventing the DBA from throttling you for crushing the shared pool) and Brosto's suggestion to do bulk binds, I would tend to question why you would write this sort of thing in C# in the first place. It would be much more efficient to do something like use external tables so that your application causes a file to be placed on the database server's file system and the parsing and loading of the data is done by the external table definition (or even having your application invoke SQL*Loader). That lets you leverage the code Oracle has already optimized for this sort of processing.

Additionally, since you are counting the number of lines in the file once at the outset in order to display a progress bar, you can optimize that by getting an approximate row count from more easily obtained information. You know roughly how many bytes per line in the file and the size of a file is a relatively easy file attribute to retrieve. That should allow you to estimate the number of lines in the file relatively accurately which should be more than enough for a progress bar.


just removing the call to CountLinesInFile would probably help because in that method you are reading all file line by line and as you say the file is biiig...


Use a bulk insert to load fast:

Insert Bulk Data From CSV File To Database Table Using SQLBulkCopy Class

Or

Use a SQL*Loader utility:

Loading CSV file data into a Table Variable (Index-by table) in Oracle

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜