Wednesday, October 13, 2010

How to validate a CSV file while importing to a SQL table

This post is based on How to import CSV or Text file to SQL table using SqlBulkCopy in C#, which contain all the explanation about the data import validating excising records and bulk import.

Validating file schema is the next hurdle. For this I have imported the Importer’s column mapping class to handle column type and column width. So that invalid file will be thrown out with an exception.
Based on the table, 
I add column mappings with column types and widths of text fields
///
/// Set column mappings
///
importer.DateTimeFormat = "dd-MMM-yyyy";
importer.ColumnMappings.Add(new ColumnMapping("Id""Id"DataType.Short));
importer.ColumnMappings.Add(new ColumnMapping("Name""Name"DataType.Text, 50));
importer.ColumnMappings.Add(new ColumnMapping("Grade""Grade"DataType.Text, 3));
 Will create a schema file like this,
When we input a CSV file in correct format like this
Importer will import the file. But an Invalid file like this will be thrown with an exception.

Full example
Mark-up:
<%@ Page Language="C#" %>
<html>
<head id="Head1" runat="server">
</head>
<body>
    <form id="form1" runat="server">
        <asp:FileUpload runat="server" ID="fuFile" />
        <asp:Button runat="server" ID="btnImport" OnClick="Import" Text="Import" />
        <asp:DropDownList runat="server" ID="ddlFileType">
            <asp:ListItem Value="CSV" Selected="True">CSV Delimited</asp:ListItem>
            <asp:ListItem Value="TAB">Tab Delimited</asp:ListItem>
        </asp:DropDownList>
    </form>
</body>
</html>
Code:
namespace ActiveTest
{
    public partial class Test : Page
    {
        protected void Import(object sender, EventArgs e)
        {
            String path = Server.MapPath("~/Data/");
            try
            {
                if (this.fuFile.HasFile)
                {
                    /// 
                    /// Upload file
                    ///
                    string fileName = this.fuFile.FileName;
                    string fullName = Path.Combine(path, fileName);
                    if (File.Exists(fullName)) File.Delete(fullName);
                    this.fuFile.PostedFile.SaveAs(fullName);
                    ///
                    /// Instantiate DelimitedDataImporter and set connection string
                    ///
                    DelimiterType type = DelimiterType.CsvDelimited;
                    if (this.ddlFileType.SelectedValue.Equals("TAB")) type = DelimiterType.TabDelimited;
                    DelimitedFileImporter importer = new DelimitedFileImporter(type);
                    importer.ConnectionString =
                        ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString;
                    ///
                    /// Find existing records and set filter statement [OPTIONAL]
                    ///
                    StringBuilder sb = new StringBuilder(string.Empty);
                    using (SqlConnection con = new SqlConnection(
                        ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString))
                    {
                        SqlCommand cmd = new SqlCommand("SELECT Id FROM Student", con);
                        con.Open();
                        SqlDataReader reader = cmd.ExecuteReader();
                        while (reader.Read())
                            sb.AppendFormat("{0},", reader.GetInt32(0).ToString());
                        reader.Close();
                        con.Close();
                    }
                    string ids = string.Empty;
                    if (!string.IsNullOrEmpty(sb.ToString())) ids = sb.ToString().Remove(sb.Length - 1, 1);
                    if (!string.IsNullOrEmpty(ids)) importer.Filter =
                        string.Format("Id NOT IN ({0})", ids);
                    ///
                    /// Set column mappings
                    ///
                    importer.DateTimeFormat = "dd-MMM-yyyy";
                    importer.ColumnMappings.Add(new ColumnMapping("Id""Id"DataType.Short));
                    importer.ColumnMappings.Add(new ColumnMapping("Name""Name"DataType.Text, 50));
                    importer.ColumnMappings.Add(new ColumnMapping("Grade""Grade"DataType.Text, 3));
                    ///
                    /// Import data to sql table
                    ///
                    importer.ImportToDatabase(fullName, "Student");
                }
            }
            catch
            {
                throw new Exception("Import Faild");
            }
        }      
    }
 
    #region EventArgs Specification
 
    public enum DelimiterType
    {
        TabDelimited,
        CsvDelimited,
        CustomDelimited
    }
    public class ImportDelimitedEventArgs : EventArgs
    {
        private ReadOnlyCollection<object> content;
        private int lineNumber;
 
        public ImportDelimitedEventArgs(OleDbDataReader reader, int number)
        {
            object[] columns = new object[reader.FieldCount];
            reader.GetValues(columns);
            content = new ReadOnlyCollection<object>(columns);
            lineNumber = number;
        }
        public bool BreakImport { getset; }
        public int LineNumber
        {
            get { return lineNumber; }
        }
        public ReadOnlyCollection<object> Content
        {
            get { return content; }
        }
    }
 
    #endregion
 
    #region Mappings
 
    public class ColumnMapping
    {
        public string SourceColumn { getset; }
        public string DestinationColumn { getset; }
        public DataType Type { getset; }
        public int Width { getset; }
        public bool IsValidatble { getset; }
        public ColumnMapping(string sourceColumn, string destinationColumn)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
        }
        public ColumnMapping(string sourceColumn, string destinationColumn, DataType type)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
            this.Type = type;
            if (this.Type == DataType.Text)
                throw new ArgumentNullException("Width should be specified when the data type is Text");
            this.IsValidatble = true;
        }
        public ColumnMapping(string sourceColumn, string destinationColumn, DataType type, int width)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
            this.Type = type;
            this.Width = width;
            this.IsValidatble = true;
        }
    }
    public class ColumnMappingCollection : List<ColumnMapping>
    {
        public bool IsValidatable
        {
            get
            {
                foreach (ColumnMapping m in this) 
                    if (!m.IsValidatble) return false;
                return true;
            }
        }
        public bool HasDateFields
        {
            get
            {
                foreach (ColumnMapping m in thisif (m.Type == DataType.DateTime) return true;
                return false;
            }
        }
    }
 
    #endregion
 
    #region Types
 
    public enum DataType
    {
        Bit,
        Byte,
        Short,
        Long,
        Currency,
        Single,
        Double,
        DateTime,
        Text,
        Memo
    }
 
    #endregion
 
    #region Importer
 
    public class DelimitedFileImporter
    {
        #region Event handler
 
        public event EventHandler<ImportDelimitedEventArgs> ProcessLine;
        private bool HandleLine(OleDbDataReader reader, int number)
        {
            bool result = true;
            EventHandler<ImportDelimitedEventArgs> temp = ProcessLine;
            if (temp != null)
            {
                ImportDelimitedEventArgs args = new ImportDelimitedEventArgs(reader, number);
                temp(this, args);
                result = !args.BreakImport;
            }
            return result;
        }
 
        #endregion
 
        #region Constructors
 
        public DelimitedFileImporter()
            : this(DelimiterType.TabDelimited, null)
        {
            this.Initialize();
        }
        public DelimitedFileImporter(DelimiterType delimiter)
            : this(delimiter, null)
        {
            this.Initialize();
        }
        public DelimitedFileImporter(DelimiterType delimiterType, string delimiter)
        {
            this.Delimiter = delimiterType;
            this.CustomDelimiter = delimiter;
            this.Initialize();
        }
        private void Initialize()
        {
            this.ColumnMappings = new ColumnMappingCollection();
        }
 
        #endregion
 
        #region Properties
 
        public DelimiterType Delimiter { getset; }
        public string CustomDelimiter { get;set;}
        public string Filter { getset; }
        public string ConnectionString { getset; }
        public string DateTimeFormat { getset; }
        public ColumnMappingCollection ColumnMappings { getset; }
 
        #endregion
 
        #region Methods
 
        public void ImportToDatabase(string fileName, string tableName)
        {
            if (string.IsNullOrEmpty(this.ConnectionString))
                throw new ArgumentNullException("Connection String");
            FileInfo file = new FileInfo(fileName);
            WriteSchemaIniFile(file);
            using (OleDbConnection con = JetConnection(file))
            {
                using (OleDbCommand cmd = JetCommand(file, con))
                {
                    con.Open();
                    using (OleDbDataReader reader = cmd.ExecuteReader())
                    {
                        SqlBulkCopy sqlBulk = new SqlBulkCopy(this.ConnectionString);
                        sqlBulk.DestinationTableName = tableName;
                        foreach (ColumnMapping map in this.ColumnMappings)
                            sqlBulk.ColumnMappings.Add(map.DestinationColumn, map.SourceColumn);
                        sqlBulk.WriteToServer(reader);
                    }
                }
            }
        }
        private OleDbConnection JetConnection(FileInfo file)
        {
            StringBuilder connection = new StringBuilder("Provider=Microsoft.Jet.OLEDB.4.0");
            connection.AppendFormat(";Data Source=\"{0}\"", file.DirectoryName);
            connection.Append(";Extended Properties='text;HDR=Yes");
            if (this.Delimiter == DelimiterType.CustomDelimited)
            {
                if (this.CustomDelimiter == null)
                    throw new InvalidOperationException("Custom delimiter is not specified");
                connection.AppendFormat(";FMT=Delimited({1})"this.CustomDelimiter);
            }
            connection.Append("';");
            return new OleDbConnection(connection.ToString());
        }
        private OleDbCommand JetCommand(FileInfo file, OleDbConnection con)
        {
            StringBuilder commandText = new StringBuilder("SELECT * FROM ");
            commandText.AppendFormat("[{0}]", file.Name);
            if (this.Filter != null)
            {
                commandText.Append(" WHERE ");
                commandText.Append(this.Filter);
            }
            OleDbCommand cmd = new OleDbCommand(commandText.ToString(), con);
            cmd.CommandTimeout = 60000;
            return cmd;
        }
        private void WriteSchemaIniFile(FileInfo file)
        {
            string schema = Path.Combine(file.DirectoryName, "Schema.ini");
            if (!File.Exists(schema)) File.Delete(schema);
            using (StreamWriter writer = new StreamWriter(schema))
            {
                writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}]", file.Name));
                if (this.ColumnMappings.IsValidatable) writer.WriteLine("ColNameHeader=True");             
                switch (this.Delimiter)
                {
                    case DelimiterType.CustomDelimited:
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture,
                            "Format=Delimited({0})"this.CustomDelimiter));
                        break;
                    case DelimiterType.CsvDelimited:
                    case DelimiterType.TabDelimited:
                    default:
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture,
                            "Format={0}"this.Delimiter));
                        break;
                }
                if (this.ColumnMappings.HasDateFields)
                {
                    if (string.IsNullOrEmpty(this.DateTimeFormat))
                        throw new ArgumentNullException("DateTimeFormat missing");
                    else
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture, 
                            "DateTimeFormat={0}"this.DateTimeFormat));
                }
                if (this.ColumnMappings.IsValidatable)
                {
                    foreach (ColumnMapping m in this.ColumnMappings)
                    {
                        writer.WriteLine("Col{0}={1} {2}{3}", 
                            this.ColumnMappings.IndexOf(m) + 1, m.SourceColumn, 
                                m.Type, m.Type == DataType.Text ? " Width " + m.Width.ToString() : string.Empty);
                    }
                }
            }
        }
 
        #endregion
    }
 
    #endregion
}

1 comment:

Anonymous said...

Interesting code. Will test soon. Seems like lots of work has gone into it.

iPhone Launch Screen Sizes

iPhone Portrait iOS 8 Retina HT 5.5 = 1242 X 2208 Retna HD 4.7 = 750 X 1134 iPhone Landscape iOS 8 Retina HD 5.5  2208 X 1242 iPho...