Tuesday 6 August 2013

Extract All Email from a URL and Save it in a Text File using ASP.Net C#


Extract All Email from a URL and Save it in a Text File using ASP.Net C#


So Today I am gonna discuss how can we extract all the email Ids present in a Particular URL using asp.net(C#)..

Create a Form Design like as given below..



<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
    <title>Extract Email From URL</title>
</head>
<body>
    <form id="form1" runat="server">
    <div>
    <h1>Extract Email By URL ~~ By Vishal Ranjan</h1>
        <br />
        <br />
        <br />
        <asp:Label ID="Label1" runat="server" Font-Bold="True" Text="Enter URL:"></asp:Label>
        <asp:TextBox ID="TextBox1" runat="server"></asp:TextBox><br />
        <br />
        <br />
        <br />
        <br />
        <asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="Search" /><br />
        <br />
        <br />
        <br />
        <asp:Label ID="lblMsg" runat="server" Font-Bold="True" ForeColor="Red"></asp:Label></div>
    </form>
</body>
</html>

Then Create a Class File to Extract the Emails as follows.. Name the Class file as GetEmails.cs


using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace FindAllEmails
{
    public class GetEmails
    {
        //public method called from your application 
        public void RetrieveEmails(string webPage)
        {
            GetAllEmails(RetrieveContent(webPage));
        }


        //get the content of the web page passed in 
        private string RetrieveContent(string webPage)
        {
            HttpWebResponse response = null;//used to get response 
            StreamReader respStream = null;//used to read response into string 
            try
            {
                //create a request object using the url passed in 
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webPage);
                request.Timeout = 10000;

                //go get a response from the page 
                response = (HttpWebResponse)request.GetResponse();

                //create a streamreader object from the response 
                respStream = new StreamReader(response.GetResponseStream());

                //get the contents of the page as a string and return it 
                return respStream.ReadToEnd();
            }
            catch (Exception ex)//houston we have a problem! 
            {
                throw ex;
            }
            finally
            {
                //close it down, we're going home! 
                response.Close();
                respStream.Close();
            }
        }



        //using a regular expression, find all of the href or urls 
        //in the content of the page 
        private void GetAllEmails(string content)
        {
            //regular expression 
            string pattern = @"(([\w-]+\.)+[\w-]+|([a-zA-Z]{1}|[\w-]{2,}))@" + @"((([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\." + @"([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])\.([0-1]?[0-9]{1,2}|25[0-5]|2[0-4][0-9])){1}|"+ @"([a-zA-Z]+[\w-]+\.)+[a-zA-Z]{2,4})";

            //Set up regex object 
            Regex RegExpr = new Regex(pattern, RegexOptions.IgnoreCase);

            //get the first match 
            Match match = RegExpr.Match(content);

            //loop through matches 
            while (match.Success)
            {

                //output the match info to a file named matchlog.txt in D drive..
                Console.WriteLine("href match: " + match.Groups[0].Value);
                WriteToLog("D:matchlog.txt", "Email match: " + match.Groups[0].Value + Environment.NewLine);

                //get next match 
                match = match.NextMatch();
            }
        }



        //Write to a log file 
        private void WriteToLog(string file, string message)
        {
            using (StreamWriter w = File.AppendText(file))
            {
                w.WriteLine(DateTime.Now.ToString() + ": " + message); w.Close();
            }
        } 


    }
}

Finally In the Code Behind create the object of that class by referring the corresponding Namespace..


using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using FindAllEmails;

public partial class ExtractEmail : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {

    }
    protected void Button1_Click(object sender, EventArgs e)
    {
        try
        {
            string url = TextBox1.Text.Trim();
            GetEmails ge = new GetEmails();
            ge.RetrieveEmails(url);
            TextBox1.Text = "";
            lblMsg.Text = @"Emails Retreived and Saved to D:\matchlog.txt";
        }
        catch (Exception ex)
        {
            lblMsg.Text = "The following Error Just Occured: " + ex.Message;
        }
    }
}

Once You have done this we are all set to go.. Just Enter the URL link with http:// attached in the corresponding Textbox and click on search button.. All the Emails Present in the associated page will be copied to the file matchlog.txt in D drive.. You can change the path of file  Wherever you want ..

This Program can be used to create a web service for extracting all Emails from a Page and creating a database of emails and then auto-sending that emails at scheduled interval. Just a Thought.. :-)

2 comments:

  1. awesome article, thanks

    ReplyDelete
  2. I take pleasure in, rezult in I found just what I was having
    a look for. You've ended my four day long hunt! God Bless you man.
    Have a nice day. Bye

    ReplyDelete

Thank You for Your Comments. We will get back to you soon.

back to top