This article shows how you can read nested and complex large XML file using XmlReader.
few benefits of using XmlReader
- XmlReader reads data in a forward-only and read-only fashion. That’s why it is faster than loading XML in memory and read node by node.
- XmlReader reads line by line.
- You can use ReadState property to check the current state of the reader.
- You can define XMLReaderSettings to customize reader behavior. (see below example)
Sample Xml
<?xml version="1.0" encoding="UTF-8"?>
<cafProductFeed>
<datafeed id="7858" merchantId="7858" merchantName="SampleMerchant">
<prod id="672553505" in_stock="yes" is_for_sale="yes" pre_order="no" stock_quantity="2" web_offer="no">
<brand/>
<cat>
<mCat>B2C_2</mCat>
</cat>
<price curr="GBP">
<buynow>33.99</buynow>
<rrp>0.00</rrp>
<store>0.00</store>
</price>
<text>
<name>Laptop Memory</name>
</text>
<uri>
<track>link</track>
<mImage>link</mImage>
<mLink>link</mLink>
</uri>
<vertical/>
<pId>387912</pId>
<delTime>Free Delivery</delTime>
<lastUpdated>2020-11-02 09:58:02</lastUpdated>
<mpn>KVR16S11/8</mpn>
</prod>
//more prod nodes
</datafeed>
</cafProductFeed>
Steps ..
- Define XMLReaderSettings (optional)
- Create XMLReader (XmlReader.Create)
- Read Reader (reader.Read())
Read attributes
As I have discussed before reader read line by line. When the reader reading any node you can get its attributes by following the command. Such as
reader.GetAttribute("in_stock")
Read Sub nodes
if (reader.Name == "pId")
{
product.MerchantProdId = reader.ReadInnerXml();
}
Example code
public static List<Product> Products(string filePath)
{
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.IgnoreWhitespace = true;
var results = new List<Product>();
var product = new Product();
var merchantName = string.Empty;
int merchantId = 0;
long ean = 0;
long upc = 0;
long mpn = 0;
using (XmlReader reader = XmlReader.Create(filePath, settings))
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element)
{
if (reader.Name == "prod")
{
if (product.SupplierProdId > 0 && (!string.IsNullOrEmpty(product.Mpn) || !string.IsNullOrEmpty(product.Ean) || !string.IsNullOrEmpty(product.Upc)))
results.Add(product);
long id;
long.TryParse(reader.GetAttribute("id"), out id);
int stock;
int.TryParse(reader.GetAttribute("stock_quantity"), out stock);
product = new Product
{
SupplierProdId = id,
InStock = reader.GetAttribute("in_stock") == "yes",
IsForSale = reader.GetAttribute("is_for_sale") == "yes",
PreOrder = reader.GetAttribute("pre_order") == "yes",
WebOffer = reader.GetAttribute("web_offer") == "yes",
StockQuantity = stock
};
}
if (reader.Name == "pId")
{
product.MerchantProdId = reader.ReadInnerXml();
}
if (reader.Name == "mpn")
{
var strMpn = reader.ReadInnerXml();
long.TryParse(strMpn, out mpn);
if (mpn > 0)
{
product.Ean = strMpn;
}
else
{
product.Mpn = strMpn;
}
}
if (reader.Name == "ean")
{
var strEan = reader.ReadInnerXml();
long.TryParse(strEan, out ean);
if (ean <= 0)
product.Mpn = strEan;
else
product.Ean = strEan;
}
if (reader.Name == "upc")
{
var strUpc = reader.ReadInnerXml();
long.TryParse(strUpc, out upc);
if (upc <= 0)
product.Mpn = strUpc;
else
product.Upc = strUpc;
}
if (reader.Name == "track")
{
product.Url = reader.ReadInnerXml();
}
if (reader.Name == "name")
{
product.Name = reader.ReadInnerXml();
}
if (reader.Name == "datafeed")
{
int.TryParse(reader.GetAttribute("merchantId"), out merchantId);
merchantName = reader.GetAttribute("merchantName");
}
if (reader.Name == "price")
{
product.Currency = reader.GetAttribute("curr");
}
if (reader.Name == "buynow")
{
product.Price = Convert.ToDouble(reader.ReadInnerXml());
}
if (reader.Name == "rrp")
{
product.RetailPrice = Convert.ToDouble(reader.ReadInnerXml());
}
if (reader.Name == "store")
{
product.StorePrice = Convert.ToDouble(reader.ReadInnerXml());
}
product.MerchantId = merchantId;
product.MerchantName = merchantName;
}
}
}
return results;
}
It would be helpful if you could post a link to a github with the source code.