- Get link
- X
- Other Apps
using System;
using System.IO;
using System.Net;
using System.Text;
using System.ComponentModel;
using System.Collections;
namespace capp1
{
class HttpGetClass
{
public Uri ResourceUri;
public IWebProxy HttpProxy;
public string LocalSavePath;
public bool RecursiveGet;
public ArrayList RetrievedUri;
static void Main(string[] args)
{
HttpGetClass httpGet = new HttpGetClass();
usage();
// Parse the command line
for (int i = 0; i < args.Length; i++)
{
try
{
if ((args[i][0] == '-') || (args[i][0] == '/'))
{
switch (Char.ToLower(args[i][1]))
{
case 'u':
// URI to download (get)
httpGet.ResourceUri = new Uri(args[++i]);
break;
case 'p':
// Name of proxy server to use
httpGet.HttpProxy = new WebProxy(args[++i]);
break;
case 'r':
// Retrieve all referenced images and text on the same host
httpGet.RecursiveGet = true;
break;
case 's':
// Local save path to append to retrieved resources
httpGet.LocalSavePath = args[++i];
break;
default:
usage();
return;
}
}
}
catch
{
usage();
return;
}
}
// Initialize the proxy server and retrieve the resources
Console.WriteLine("Initializing the proxy server and retrieve the resources...");
try
{
httpGet.GetResource(httpGet.ResourceUri, httpGet.RecursiveGet);
}
catch (Exception ex)
{
Console.WriteLine("Exception occurred: {0}", ex.ToString());
}
return;
}
public HttpGetClass()
{
ResourceUri = new Uri("http://www.google.com/intl/en/images/about_logo.gif");
// Examples:
// http://www.cuilimg.com/static/v2/images/new/flattened/cuil-home_id.png
// http://www.google.com/index.html
// http://www.cuil.com/search?q=MFC%20programming%20tutorials
HttpProxy = WebRequest.DefaultWebProxy;
LocalSavePath = ".";
RecursiveGet = false;
RetrievedUri = new ArrayList();
}
static void usage()
{
Console.WriteLine("Usage: Executable_file_name [-u URI] [-p proxy] [-s local-path]");
Console.WriteLine("Available options:");
Console.WriteLine(" -u URI URI to download (along with linked content)");
Console.WriteLine(" -p proxy Name of proxy server");
Console.WriteLine(" -r Retrieve referenced images and text residing");
Console.WriteLine(" on the same host.");
Console.WriteLine(" -s local-path Local path to save content at");
Console.WriteLine();
}
/// <summary>
/// Creates any subdirectories and opens the file for writing to. The path is stripped
/// from the URI to build the local path to save the file to. This path is appended
/// to the local save path supplied. A file is then opened with the same name as
/// the retrieved file and the FileStream to it is returned.
/// </summary>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
/// <param name="uriName">URI of destination resource being saved</param>
public FileStream CreateFile(string localSavePath, Uri uriName)
{
FileStream localFile = null;
string fileName;
try
{
string[] uriSegments = uriName.Segments;
string localDirs = "";
// Retrieve the directory path to the file
for (int i = 0; i < uriSegments.Length - 1; i++)
{
localDirs += uriSegments[i];
}
if (uriSegments.Length > 1)
{
// Replace the forward slashes with back slashes
Console.WriteLine("Replacing the forward slashes with back slashes...");
localDirs = localDirs.Replace(@"/", @"\");
// Remove the escaped spaces
Console.WriteLine("Removing the escaped spaces...");
string temp = localDirs.Replace(@"%20", " ");
if (temp != null)
localDirs = temp;
// Console.WriteLine(" Creating directory: {0}", localSavePath + @"\" + localDirs );
Console.WriteLine("Creating local directory...");
Directory.CreateDirectory(localSavePath + @"\" + localDirs);
fileName = uriSegments[uriSegments.Length - 1];
}
else
{
Console.WriteLine("Else, using defaults...");
localDirs = @"\";
fileName = "default.html";
}
// Open the file to write to
Console.WriteLine("Opening the file to write to...");
string saveFileName = localSavePath + localDirs + fileName;
localFile = File.Open(
saveFileName,
System.IO.FileMode.Create,
System.IO.FileAccess.Write,
System.IO.FileShare.None
);
Console.WriteLine("Created File: {0}", saveFileName);
}
catch (Exception ex)
{
Console.WriteLine("WriteHttpContentToFile failed: {0}", ex.Message);
Console.WriteLine("Stack:\n{0}", ex.StackTrace);
if (localFile != null)
localFile.Close();
}
return localFile;
}
/// <summary>
/// Parse the HTML content for any links to images and text/HTML documents that
/// reside on the same host. For each link call the GetResource method to download
/// that image and save it to the local save path.
/// </summary>
/// <param name="baseUri">URI of resource being parsed for links</param>
/// <param name="httpContent">Text content of retrieved resource</param>
/// <param name="proxy">Proxy server (if required) to use</param>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
public void ParseHtmlForReferences(Uri baseUri, string httpContent)
{
int start, copyCount = 0;
string[] match = { "<a href=\"", "src=\"" };
char[] refFile = new char[1024];
// Search for '<a href="' and 'src="' strings to indicate links to other resources.
Console.WriteLine("Searching for \'<a href=\"\' and \'src=\"\' strings to indicate links to other resources...");
for (int i = 0; i < match.Length; i++)
{
start = 0;
// Search the entire contents of the buffer for occurrences of each match string
while (true)
{
// Find the first reference
start = httpContent.IndexOf(match[i], start);
if (start == -1)
break; // Break if not present
// Offset start to end of match string (so it points the the resource link)
start = start + match[i].Length;
// Calculate how many characters make up the link
copyCount = httpContent.IndexOf("\"", start);
if (copyCount == -1)
break;
copyCount = copyCount - start;
// Copy the link to a new string
httpContent.CopyTo(start, refFile, 0, copyCount);
string x = new string(refFile, 0, copyCount);
Uri newUri = new Uri(baseUri, x);
if ((newUri.Host == baseUri.Host) && (!this.RetrievedUri.Contains(newUri)))
{
// If link is hosted on the same computer, download it
GetResource(newUri, true);
Console.WriteLine("\n");
}
}
}
}
/// <summary>
/// Retrieve the resource specified by the string URI address. The HTTP proxy
/// can be specified if needed to go outside the local network.
/// </summary>
/// <param name="resourceName">String URI to retrieve</param>
/// <param name="proxy">Proxy server to use to access resource</param>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
/// <returns></returns>
public void GetResource(Uri getUri, bool recurse)
{
HttpWebRequest httpRequest = null;
HttpWebResponse httpResponse = null;
FileStream localFile = null;
string httpContent = null;
try
{
RetrievedUri.Add(getUri);
Console.WriteLine("Retrieving: {0}", getUri.AbsoluteUri);
// Create the HTTP request object
Console.WriteLine("Creating the HTTP request object...");
httpRequest = (HttpWebRequest)WebRequest.Create(getUri.AbsoluteUri);
// Set some HTTP specific headers
Console.WriteLine("Setting some HTTP specific headers...");
httpRequest.UserAgent = "My User Agent/1.0";
// If a proxy was specified create an instance of the WebProxy with it
Console.WriteLine("If a proxy was specified create an instance of the WebProxy with it...");
httpRequest.Proxy = HttpProxy;
// Get the response object
Console.WriteLine("Getting the response object...");
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
byte[] byteContent;
long byteCount = 0, progress = 0;
// Create the file where resource is to be saved
Console.WriteLine("Creating the file where resource is to be saved...");
localFile = CreateFile(LocalSavePath, getUri);
Console.WriteLine("Response Type: {0}", httpResponse.ContentType);
if (httpResponse.ContentType.StartsWith(@"image"))
{
// If this resource is an image, retrieve the content using the binary writer.
Console.WriteLine("If this resource is a binary, retrieve the content using the binary writer...");
BinaryReader reader = new BinaryReader(httpResponse.GetResponseStream());
byte[] responseBytes;
// Read the response in 4KB chunks
Console.Write("Reading the response in 4KB chunks: ");
while (true)
{
responseBytes = reader.ReadBytes(4096);
byteCount += responseBytes.Length;
if (responseBytes.Length == 0)
break;
localFile.Write(responseBytes, 0, responseBytes.Length);
// Print progress indicator
progress = (byteCount * 100) / httpResponse.ContentLength;
Console.Write(@"{0}%", progress.ToString().PadLeft(2));
Console.Write("\b\b\b");
}
Console.WriteLine();
}
else if (httpResponse.ContentType == @"text/html")
{
// If the resource is HTML text, retrieve using the text stream reader.
Console.WriteLine("If the resource is HTML text, retrieve using the text stream reader...");
StreamReader reader = new StreamReader(httpResponse.GetResponseStream(), Encoding.UTF8);
httpContent = reader.ReadToEnd();
byteContent = Encoding.ASCII.GetBytes(httpContent);
localFile.Write(byteContent, 0, byteContent.Length);
// For HTML documents, we'll parse them for additional links so
// close the open handles as this is a recursive call and we
// don't need them anymore.
reader.Close();
reader = null;
localFile.Close();
localFile = null;
if (recurse == true)
{
ParseHtmlForReferences(getUri, httpContent);
}
}
}
catch (WebException wex)
{
Console.WriteLine("Exception occurred on request: {0}", wex.Message);
Console.WriteLine("Status code: {0}", wex.Status);
if (wex.Status == WebExceptionStatus.ProtocolError)
{
// If there was a protocol error then the response object is
// valid but there was an error retrieving the response.
httpResponse = (HttpWebResponse)wex.Response;
Console.WriteLine("\nThe protocol returned was: {0}", httpResponse.StatusCode.ToString());
httpResponse.Close();
httpResponse = null;
}
throw;
}
finally
{
// Close the resources if still open
if (localFile != null)
localFile.Close();
if (httpResponse != null)
httpResponse.Close();
}
}
}
}
using System.IO;
using System.Net;
using System.Text;
using System.ComponentModel;
using System.Collections;
namespace capp1
{
class HttpGetClass
{
public Uri ResourceUri;
public IWebProxy HttpProxy;
public string LocalSavePath;
public bool RecursiveGet;
public ArrayList RetrievedUri;
static void Main(string[] args)
{
HttpGetClass httpGet = new HttpGetClass();
usage();
// Parse the command line
for (int i = 0; i < args.Length; i++)
{
try
{
if ((args[i][0] == '-') || (args[i][0] == '/'))
{
switch (Char.ToLower(args[i][1]))
{
case 'u':
// URI to download (get)
httpGet.ResourceUri = new Uri(args[++i]);
break;
case 'p':
// Name of proxy server to use
httpGet.HttpProxy = new WebProxy(args[++i]);
break;
case 'r':
// Retrieve all referenced images and text on the same host
httpGet.RecursiveGet = true;
break;
case 's':
// Local save path to append to retrieved resources
httpGet.LocalSavePath = args[++i];
break;
default:
usage();
return;
}
}
}
catch
{
usage();
return;
}
}
// Initialize the proxy server and retrieve the resources
Console.WriteLine("Initializing the proxy server and retrieve the resources...");
try
{
httpGet.GetResource(httpGet.ResourceUri, httpGet.RecursiveGet);
}
catch (Exception ex)
{
Console.WriteLine("Exception occurred: {0}", ex.ToString());
}
return;
}
public HttpGetClass()
{
ResourceUri = new Uri("http://www.google.com/intl/en/images/about_logo.gif");
// Examples:
// http://www.cuilimg.com/static/v2/images/new/flattened/cuil-home_id.png
// http://www.google.com/index.html
// http://www.cuil.com/search?q=MFC%20programming%20tutorials
HttpProxy = WebRequest.DefaultWebProxy;
LocalSavePath = ".";
RecursiveGet = false;
RetrievedUri = new ArrayList();
}
static void usage()
{
Console.WriteLine("Usage: Executable_file_name [-u URI] [-p proxy] [-s local-path]");
Console.WriteLine("Available options:");
Console.WriteLine(" -u URI URI to download (along with linked content)");
Console.WriteLine(" -p proxy Name of proxy server");
Console.WriteLine(" -r Retrieve referenced images and text residing");
Console.WriteLine(" on the same host.");
Console.WriteLine(" -s local-path Local path to save content at");
Console.WriteLine();
}
/// <summary>
/// Creates any subdirectories and opens the file for writing to. The path is stripped
/// from the URI to build the local path to save the file to. This path is appended
/// to the local save path supplied. A file is then opened with the same name as
/// the retrieved file and the FileStream to it is returned.
/// </summary>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
/// <param name="uriName">URI of destination resource being saved</param>
public FileStream CreateFile(string localSavePath, Uri uriName)
{
FileStream localFile = null;
string fileName;
try
{
string[] uriSegments = uriName.Segments;
string localDirs = "";
// Retrieve the directory path to the file
for (int i = 0; i < uriSegments.Length - 1; i++)
{
localDirs += uriSegments[i];
}
if (uriSegments.Length > 1)
{
// Replace the forward slashes with back slashes
Console.WriteLine("Replacing the forward slashes with back slashes...");
localDirs = localDirs.Replace(@"/", @"\");
// Remove the escaped spaces
Console.WriteLine("Removing the escaped spaces...");
string temp = localDirs.Replace(@"%20", " ");
if (temp != null)
localDirs = temp;
// Console.WriteLine(" Creating directory: {0}", localSavePath + @"\" + localDirs );
Console.WriteLine("Creating local directory...");
Directory.CreateDirectory(localSavePath + @"\" + localDirs);
fileName = uriSegments[uriSegments.Length - 1];
}
else
{
Console.WriteLine("Else, using defaults...");
localDirs = @"\";
fileName = "default.html";
}
// Open the file to write to
Console.WriteLine("Opening the file to write to...");
string saveFileName = localSavePath + localDirs + fileName;
localFile = File.Open(
saveFileName,
System.IO.FileMode.Create,
System.IO.FileAccess.Write,
System.IO.FileShare.None
);
Console.WriteLine("Created File: {0}", saveFileName);
}
catch (Exception ex)
{
Console.WriteLine("WriteHttpContentToFile failed: {0}", ex.Message);
Console.WriteLine("Stack:\n{0}", ex.StackTrace);
if (localFile != null)
localFile.Close();
}
return localFile;
}
/// <summary>
/// Parse the HTML content for any links to images and text/HTML documents that
/// reside on the same host. For each link call the GetResource method to download
/// that image and save it to the local save path.
/// </summary>
/// <param name="baseUri">URI of resource being parsed for links</param>
/// <param name="httpContent">Text content of retrieved resource</param>
/// <param name="proxy">Proxy server (if required) to use</param>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
public void ParseHtmlForReferences(Uri baseUri, string httpContent)
{
int start, copyCount = 0;
string[] match = { "<a href=\"", "src=\"" };
char[] refFile = new char[1024];
// Search for '<a href="' and 'src="' strings to indicate links to other resources.
Console.WriteLine("Searching for \'<a href=\"\' and \'src=\"\' strings to indicate links to other resources...");
for (int i = 0; i < match.Length; i++)
{
start = 0;
// Search the entire contents of the buffer for occurrences of each match string
while (true)
{
// Find the first reference
start = httpContent.IndexOf(match[i], start);
if (start == -1)
break; // Break if not present
// Offset start to end of match string (so it points the the resource link)
start = start + match[i].Length;
// Calculate how many characters make up the link
copyCount = httpContent.IndexOf("\"", start);
if (copyCount == -1)
break;
copyCount = copyCount - start;
// Copy the link to a new string
httpContent.CopyTo(start, refFile, 0, copyCount);
string x = new string(refFile, 0, copyCount);
Uri newUri = new Uri(baseUri, x);
if ((newUri.Host == baseUri.Host) && (!this.RetrievedUri.Contains(newUri)))
{
// If link is hosted on the same computer, download it
GetResource(newUri, true);
Console.WriteLine("\n");
}
}
}
}
/// <summary>
/// Retrieve the resource specified by the string URI address. The HTTP proxy
/// can be specified if needed to go outside the local network.
/// </summary>
/// <param name="resourceName">String URI to retrieve</param>
/// <param name="proxy">Proxy server to use to access resource</param>
/// <param name="localSavePath">Local path to append saved resources paths to</param>
/// <returns></returns>
public void GetResource(Uri getUri, bool recurse)
{
HttpWebRequest httpRequest = null;
HttpWebResponse httpResponse = null;
FileStream localFile = null;
string httpContent = null;
try
{
RetrievedUri.Add(getUri);
Console.WriteLine("Retrieving: {0}", getUri.AbsoluteUri);
// Create the HTTP request object
Console.WriteLine("Creating the HTTP request object...");
httpRequest = (HttpWebRequest)WebRequest.Create(getUri.AbsoluteUri);
// Set some HTTP specific headers
Console.WriteLine("Setting some HTTP specific headers...");
httpRequest.UserAgent = "My User Agent/1.0";
// If a proxy was specified create an instance of the WebProxy with it
Console.WriteLine("If a proxy was specified create an instance of the WebProxy with it...");
httpRequest.Proxy = HttpProxy;
// Get the response object
Console.WriteLine("Getting the response object...");
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
byte[] byteContent;
long byteCount = 0, progress = 0;
// Create the file where resource is to be saved
Console.WriteLine("Creating the file where resource is to be saved...");
localFile = CreateFile(LocalSavePath, getUri);
Console.WriteLine("Response Type: {0}", httpResponse.ContentType);
if (httpResponse.ContentType.StartsWith(@"image"))
{
// If this resource is an image, retrieve the content using the binary writer.
Console.WriteLine("If this resource is a binary, retrieve the content using the binary writer...");
BinaryReader reader = new BinaryReader(httpResponse.GetResponseStream());
byte[] responseBytes;
// Read the response in 4KB chunks
Console.Write("Reading the response in 4KB chunks: ");
while (true)
{
responseBytes = reader.ReadBytes(4096);
byteCount += responseBytes.Length;
if (responseBytes.Length == 0)
break;
localFile.Write(responseBytes, 0, responseBytes.Length);
// Print progress indicator
progress = (byteCount * 100) / httpResponse.ContentLength;
Console.Write(@"{0}%", progress.ToString().PadLeft(2));
Console.Write("\b\b\b");
}
Console.WriteLine();
}
else if (httpResponse.ContentType == @"text/html")
{
// If the resource is HTML text, retrieve using the text stream reader.
Console.WriteLine("If the resource is HTML text, retrieve using the text stream reader...");
StreamReader reader = new StreamReader(httpResponse.GetResponseStream(), Encoding.UTF8);
httpContent = reader.ReadToEnd();
byteContent = Encoding.ASCII.GetBytes(httpContent);
localFile.Write(byteContent, 0, byteContent.Length);
// For HTML documents, we'll parse them for additional links so
// close the open handles as this is a recursive call and we
// don't need them anymore.
reader.Close();
reader = null;
localFile.Close();
localFile = null;
if (recurse == true)
{
ParseHtmlForReferences(getUri, httpContent);
}
}
}
catch (WebException wex)
{
Console.WriteLine("Exception occurred on request: {0}", wex.Message);
Console.WriteLine("Status code: {0}", wex.Status);
if (wex.Status == WebExceptionStatus.ProtocolError)
{
// If there was a protocol error then the response object is
// valid but there was an error retrieving the response.
httpResponse = (HttpWebResponse)wex.Response;
Console.WriteLine("\nThe protocol returned was: {0}", httpResponse.StatusCode.ToString());
httpResponse.Close();
httpResponse = null;
}
throw;
}
finally
{
// Close the resources if still open
if (localFile != null)
localFile.Close();
if (httpResponse != null)
httpResponse.Close();
}
}
}
}
Comments