diff --git a/modules/text/samples/webcam_demo.cpp b/modules/text/samples/webcam_demo.cpp
index 6071b3c69..ba9ba9b37 100644
--- a/modules/text/samples/webcam_demo.cpp
+++ b/modules/text/samples/webcam_demo.cpp
@@ -1,21 +1,19 @@
 /*
  * webcam-demo.cpp
  *
- * A demo program of End-to-end Scene Text Detection and Recognition.
+ * A demo program of End-to-end Scene Text Detection and Recognition using webcam or video.
  *
  * Created on: Jul 31, 2014
  *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
  */
 
 #include "opencv2/text.hpp"
-#include "opencv2/core/utility.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/features2d.hpp"
 
 #include <iostream>
 
-
 using namespace std;
 using namespace cv;
 using namespace cv::text;
@@ -32,7 +30,7 @@ private:
 public:
     Parallel_extractCSER(vector<Mat> &_channels, vector< vector<ERStat> > &_regions,
                          vector<Ptr<ERFilter> >_er_filter1, vector<Ptr<ERFilter> >_er_filter2)
-        : channels(_channels),regions(_regions),er_filter1(_er_filter1),er_filter2(_er_filter2){}
+        : channels(_channels),regions(_regions),er_filter1(_er_filter1),er_filter2(_er_filter2) {}
 
     virtual void operator()( const cv::Range &r ) const
     {
@@ -75,34 +73,81 @@ public:
     Parallel_OCR & operator=(const Parallel_OCR &a);
 };
 
-
 //Discard wrongly recognised strings
 bool   isRepetitive(const string& s);
 //Draw ER's in an image via floodFill
 void   er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation);
 
-//Perform text detection and recognition from webcam
+const char* keys =
+{
+    "{@input   | 0 | camera index or video file name}"
+    "{ image i |   | specify input image}"
+};
+
+//Perform text detection and recognition from webcam or video
 int main(int argc, char* argv[])
 {
-    cout << endl << argv[0] << endl << endl;
-    cout << "A demo program of End-to-end Scene Text Detection and Recognition using webcam." << endl << endl;
-    cout << "  Usage:  " << argv[0] << " [camera_index]" << endl << endl;
+    CommandLineParser parser(argc, argv, keys);
+
+    cout << "A demo program of End-to-end Scene Text Detection and Recognition using webcam or video." << endl << endl;
+    cout << "  Keys:  " << endl;
     cout << "  Press 'r' to switch between MSER/CSER regions." << endl;
     cout << "  Press 'g' to switch between Horizontal and Arbitrary oriented grouping." << endl;
     cout << "  Press 'o' to switch between OCRTesseract/OCRHMMDecoder recognition." << endl;
     cout << "  Press 's' to scale down frame size to 320x240." << endl;
     cout << "  Press 'ESC' to exit." << endl << endl;
+    parser.printMessage();
+
+    VideoCapture cap;
+    Mat frame, image, gray, out_img;
+    String input = parser.get<String>("@input");
+    String image_file_name = parser.get<String>("image");
+    if (image_file_name != "")
+    {
+        image = imread(image_file_name);
+        if (image.empty())
+        {
+            cout << "\nunable to open " << image_file_name << "\nprogram terminated!\n";
+            return 1;
+        }
+        else
+        {
+            cout << "\nimage " << image_file_name << " loaded!\n";
+            frame = image.clone();
+        }
+    }
+    else
+    {
+        cout << "\nInitializing capturing... ";
+        if (input.size() == 1 && isdigit(input[0]))
+            cap.open(input[0] - '0');
+        else
+            cap.open(input);
+
+        if (!cap.isOpened())
+        {
+            cout << "\nCould not initialize capturing!\n";
+            return 1;
+        }
+
+        cout << " Done!" << endl;
+
+        cap.read(frame);
+    }
 
     namedWindow("recognition",WINDOW_NORMAL);
+    imshow("recognition", frame);
+    waitKey(1);
+
     bool downsize = false;
     int  REGION_TYPE = 1;
     int  GROUPING_ALGORITHM = 0;
     int  RECOGNITION = 0;
-    char *region_types_str[2] = {const_cast<char *>("ERStats"), const_cast<char *>("MSER")};
-    char *grouping_algorithms_str[2] = {const_cast<char *>("exhaustive_search"), const_cast<char *>("multioriented")};
-    char *recognitions_str[2] = {const_cast<char *>("Tesseract"), const_cast<char *>("NM_chain_features + KNN")};
 
-    Mat frame,grey,orig_grey,out_img;
+    String region_types_str[2] = {"ERStats", "MSER"};
+    String grouping_algorithms_str[2] = {"exhaustive_search", "multioriented"};
+    String recognitions_str[2] = {"Tesseract", "NM_chain_features + KNN"};
+
     vector<Mat> channels;
     vector<vector<ERStat> > regions(2); //two channels
 
@@ -118,15 +163,13 @@ int main(int argc, char* argv[])
         er_filters2.push_back(er_filter2);
     }
 
-    //double t_r = getTickCount();
-
     //Initialize OCR engine (we initialize 10 instances in order to work several recognitions in parallel)
-    cout << "Initializing OCR engines ..." << endl;
+    cout << "Initializing OCR engines ... ";
     int num_ocrs = 10;
     vector< Ptr<OCRTesseract> > ocrs;
     for (int o=0; o<num_ocrs; o++)
     {
-      ocrs.push_back(OCRTesseract::create());
+        ocrs.push_back(OCRTesseract::create());
     }
 
     Mat transition_p;
@@ -140,26 +183,12 @@ int main(int argc, char* argv[])
     vector< Ptr<OCRHMMDecoder> > decoders;
     for (int o=0; o<num_ocrs; o++)
     {
-      decoders.push_back(OCRHMMDecoder::create(loadOCRHMMClassifierNM("OCRHMM_knn_model_data.xml.gz"),
-                                               voc, transition_p, emission_p));
+        decoders.push_back(OCRHMMDecoder::create(loadOCRHMMClassifierNM("OCRHMM_knn_model_data.xml.gz"),
+                           voc, transition_p, emission_p));
     }
     cout << " Done!" << endl;
 
-    //cout << "TIME_OCR_INITIALIZATION_ALT = "<< ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
-
-
-    int cam_idx = 0;
-    if (argc > 1)
-        cam_idx = atoi(argv[1]);
-
-    VideoCapture cap(cam_idx);
-    if(!cap.isOpened())
-    {
-        cout << "ERROR: Cannot open default camera (0)." << endl;
-        return -1;
-    }
-
-    while (cap.read(frame))
+    while ( true )
     {
         double t_all = (double)getTickCount();
 
@@ -167,93 +196,65 @@ int main(int argc, char* argv[])
             resize(frame,frame,Size(320,240));
 
         /*Text Detection*/
-
-        cvtColor(frame,grey,COLOR_RGB2GRAY);
-        grey.copyTo(orig_grey);
+        cvtColor(frame,gray,COLOR_BGR2GRAY);
         // Extract channels to be processed individually
         channels.clear();
-        channels.push_back(grey);
-        channels.push_back(255-grey);
-
+        channels.push_back(gray);
+        channels.push_back(255-gray);
 
         regions[0].clear();
         regions[1].clear();
-        //double t_d = (double)getTickCount();
 
         switch (REGION_TYPE)
         {
-        case 0:
-        {
-            parallel_for_(cv::Range(0,(int)channels.size()), Parallel_extractCSER(channels,regions,er_filters1,er_filters2));
+        case 0: // ERStats
+            parallel_for_(cv::Range(0, (int)channels.size()), Parallel_extractCSER(channels, regions, er_filters1, er_filters2));
             break;
-        }
-        case 1:
-        {
-            //Extract MSER
+        case 1: // MSER
             vector<vector<Point> > contours;
             vector<Rect> bboxes;
-            Ptr<MSER> mser = MSER::create(21,(int)(0.00002*grey.cols*grey.rows),(int)(0.05*grey.cols*grey.rows),1,0.7);
-            mser->detectRegions(grey, contours, bboxes);
+            Ptr<MSER> mser = MSER::create(21, (int)(0.00002*gray.cols*gray.rows), (int)(0.05*gray.cols*gray.rows), 1, 0.7);
+            mser->detectRegions(gray, contours, bboxes);
 
             //Convert the output of MSER to suitable input for the grouping/recognition algorithms
             if (contours.size() > 0)
-                MSERsToERStats(grey, contours, regions);
-
+                MSERsToERStats(gray, contours, regions);
             break;
         }
-        case 2:
-        {
-            break;
-        }
-        }
-        //cout << "TIME_REGION_DETECTION_ALT = " << ((double)getTickCount() - t_d)*1000/getTickFrequency() << endl;
 
         // Detect character groups
-        //double t_g = getTickCount();
         vector< vector<Vec2i> > nm_region_groups;
         vector<Rect> nm_boxes;
         switch (GROUPING_ALGORITHM)
         {
-        case 0:
-        {
+        case 0: // exhaustive_search
             erGrouping(frame, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_HORIZ);
             break;
-        }
-        case 1:
-        {
+        case 1: //multioriented
             erGrouping(frame, channels, regions, nm_region_groups, nm_boxes, ERGROUPING_ORIENTATION_ANY, "./trained_classifier_erGrouping.xml", 0.5);
             break;
         }
-        }
-        //cout << "TIME_GROUPING_ALT = " << ((double)getTickCount() - t_g)*1000/getTickFrequency() << endl;
-
-
-
 
         /*Text Recognition (OCR)*/
 
-
-        frame.copyTo(out_img);
-        int scale = downsize ? 2 : 1;
-        float scale_img  = (float)((600.f/frame.rows)/scale);
-        float scale_font = (float)(2-scale_img)/1.4f;
+        int bottom_bar_height= out_img.rows/7 ;
+        copyMakeBorder(frame, out_img, 0, bottom_bar_height, 0, 0, BORDER_CONSTANT, Scalar(150, 150, 150));
+        float scale_font = (float)(bottom_bar_height /85.0);
         vector<string> words_detection;
         float min_confidence1 = 0.f, min_confidence2 = 0.f;
 
         if (RECOGNITION == 0)
         {
-          min_confidence1 = 51.f; min_confidence2 = 60.f;
+            min_confidence1 = 51.f;
+            min_confidence2 = 60.f;
         }
 
         vector<Mat> detections;
 
-        //t_r = getTickCount();
-
         for (int i=0; i<(int)nm_boxes.size(); i++)
         {
             rectangle(out_img, nm_boxes[i].tl(), nm_boxes[i].br(), Scalar(255,255,0),3);
 
-
             Mat group_img = Mat::zeros(frame.rows+2, frame.cols+2, CV_8UC1);
             er_draw(channels, regions, nm_region_groups[i], group_img);
             group_img(nm_boxes[i]).copyTo(group_img);
@@ -268,27 +269,25 @@ int main(int argc, char* argv[])
         // parallel process detections in batches of ocrs.size() (== num_ocrs)
         for (int i=0; i<(int)detections.size(); i=i+(int)num_ocrs)
         {
-          Range r;
-          if (i+(int)num_ocrs <= (int)detections.size())
-            r = Range(i,i+(int)num_ocrs);
-          else
-            r = Range(i,(int)detections.size());
+            Range r;
+            if (i+(int)num_ocrs <= (int)detections.size())
+                r = Range(i,i+(int)num_ocrs);
+            else
+                r = Range(i,(int)detections.size());
 
-          switch(RECOGNITION)
-          {
-            case 0:
-              parallel_for_(r, Parallel_OCR<OCRTesseract>(detections, outputs, boxes, words, confidences, ocrs));
-              break;
-            case 1:
-              parallel_for_(r, Parallel_OCR<OCRHMMDecoder>(detections, outputs, boxes, words, confidences, decoders));
-              break;
-          }
+            switch(RECOGNITION)
+            {
+            case 0: // Tesseract
+                parallel_for_(r, Parallel_OCR<OCRTesseract>(detections, outputs, boxes, words, confidences, ocrs));
+                break;
+            case 1: // NM_chain_features + KNN
+                parallel_for_(r, Parallel_OCR<OCRHMMDecoder>(detections, outputs, boxes, words, confidences, decoders));
+                break;
+            }
         }
 
-
         for (int i=0; i<(int)detections.size(); i++)
         {
-
             outputs[i].erase(remove(outputs[i].begin(), outputs[i].end(), '\n'), outputs[i].end());
             //cout << "OCR output = \"" << outputs[i] << "\" length = " << outputs[i].size() << endl;
             if (outputs[i].size() < 3)
@@ -311,56 +310,57 @@ int main(int argc, char* argv[])
                 rectangle(out_img, boxes[i][j].tl()-Point(3,word_size.height+3), boxes[i][j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1);
                 putText(out_img, words[i][j], boxes[i][j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font));
             }
-
         }
 
-        //cout << "TIME_OCR_ALT = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl;
-
-
         t_all = ((double)getTickCount() - t_all)*1000/getTickFrequency();
-        char buff[100];
-        sprintf(buff, "%2.1f Fps. @ %dx%d", (float)(1000/t_all), out_img.cols, out_img.rows);
-        string fps_info = buff;
-        rectangle(out_img, Point( out_img.rows-(160/scale),out_img.rows-(70/scale) ), Point(out_img.cols,out_img.rows), Scalar(255,255,255),-1);
-        putText(out_img, fps_info, Point( 10,out_img.rows-(10/scale) ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0));
-        putText(out_img, region_types_str[REGION_TYPE], Point( out_img.rows-(150/scale),out_img.rows-(50/scale) ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0));
-        putText(out_img, grouping_algorithms_str[GROUPING_ALGORITHM], Point( out_img.rows-(150/scale),out_img.rows-(30/scale) ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0));
-        putText(out_img, recognitions_str[RECOGNITION], Point( out_img.rows-(150/scale),out_img.rows-(10/scale) ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0));
-
+        int text_thickness = 1+(out_img.rows/500);
+        string fps_info = format("%2.1f Fps. %dx%d", (float)(1000 / t_all), frame.cols, frame.rows);
+        putText(out_img, fps_info, Point( 10,out_img.rows-5 ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0), text_thickness);
+        putText(out_img, region_types_str[REGION_TYPE], Point((int)(out_img.cols*0.5), out_img.rows - (int)(bottom_bar_height / 1.5)), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0), text_thickness);
+        putText(out_img, grouping_algorithms_str[GROUPING_ALGORITHM], Point((int)(out_img.cols*0.5),out_img.rows-((int)(bottom_bar_height /3)+4) ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0), text_thickness);
+        putText(out_img, recognitions_str[RECOGNITION], Point((int)(out_img.cols*0.5),out_img.rows-5 ), FONT_HERSHEY_DUPLEX, scale_font, Scalar(255,0,0), text_thickness);
 
         imshow("recognition", out_img);
-        //imwrite("recognition_alt.jpg", out_img);
-        int key = waitKey(30);
-        if (key == 27) //wait for key
+
+        if ((image_file_name == "") && !cap.read(frame))
         {
-            cout << "esc key pressed" << endl;
+            cout << "Capturing ended! press any key to exit." << endl;
+            waitKey();
+            return 0;
+        }
+
+        int key = waitKey(30); //wait for a key press
+
+        switch (key)
+        {
+        case 27: //ESC
+            cout << "ESC key pressed and exited." << endl;
+            return 0;
+        case 32: //SPACE
+            imwrite("recognition_alt.jpg", out_img);
+            break;
+        case 103: //'g'
+            GROUPING_ALGORITHM = (GROUPING_ALGORITHM+1)%2;
+            cout << "Grouping switched to " << grouping_algorithms_str[GROUPING_ALGORITHM] << endl;
+            break;
+        case 111: //'o'
+            RECOGNITION = (RECOGNITION+1)%2;
+            cout << "OCR switched to " << recognitions_str[RECOGNITION] << endl;
+            break;
+        case 114: //'r'
+            REGION_TYPE = (REGION_TYPE+1)%2;
+            cout << "Regions switched to " << region_types_str[REGION_TYPE] << endl;
+            break;
+        case 115: //'s'
+            downsize = !downsize;
+            if (!image.empty())
+            {
+                frame = image.clone();
+            }
+            break;
+        default:
             break;
         }
-        else
-        {
-            switch (key)
-            {
-            case 103: //g
-                GROUPING_ALGORITHM = (GROUPING_ALGORITHM+1)%2;
-                cout << "Grouping switched to " << grouping_algorithms_str[GROUPING_ALGORITHM] << endl;
-                break;
-            case 111: //o
-                RECOGNITION = (RECOGNITION+1)%2;
-                cout << "OCR switched to " << recognitions_str[RECOGNITION] << endl;
-                break;
-            case 114: //r
-                REGION_TYPE = (REGION_TYPE+1)%2;
-                cout << "Regions switched to " << region_types_str[REGION_TYPE] << endl;
-                break;
-            case 115: //s
-                downsize = !downsize;
-                break;
-            default:
-                break;
-
-            }
-        }
-
     }
 
     return 0;
@@ -389,11 +389,9 @@ bool isRepetitive(const string& s)
         return true;
     }
 
-
     return false;
 }
 
-
 void er_draw(vector<Mat> &channels, vector<vector<ERStat> > &regions, vector<Vec2i> group, Mat& segmentation)
 {
     for (int r=0; r<(int)group.size(); r++)