Monday, December 2, 2013

Regular Expressions (RE) In ORACLE

We have the following regular expressions in oracle

1.      REGEXP_LIKE
2.      REGEXP_REPLACE
3.      REGEXP_INSTR
4.      REGEXP_SUBSTR
5.      REGEXP_COUNT (in 11g)

RE’s are POSIX (Portable Operating System Interface Standardization Effort) compliant.

create table address_mah ( addr varchar2(100))

insert into address_mah values ('123 4th St.');
insert into address_mah values ('4 Maple Ct.');
insert into address_mah values ('2167 Greenbrier Blvd.');
insert into address_mah values ('33 Third St.');
insert into address_mah values ('One First Drive');
insert into address_mah values ('1664 1/2 Springhill Ave');
insert into address_mah values ('2003 Geaux Illini Dr.');

create table test_clob_mah ( num number(3), ch clob);

insert into test_clob_mah values (1, 'A simple line of text');
insert into test_clob_mah values (2, 'This line contains two lines of text;
 it includes a carriage return/line feed');

Parameters:
            i to ignore case
            c to match case
  n to make the meta character dot symbol match new lines as well as             other characters
            m to make the meta characters ^ and $

In RE-Oracle, we have the following
            ^      -       A caret Symbol, Matches the beginning of a string
            $       -      Matches the end of a string
            .       -       The period matches anything and is called “match                                any character”
            |        -        Alternation operator, works as like OR
          {}         -       Repeat operator



REGEXP_INSTR:
It returns a number signifying where a pattern begins.

select regexp_instr ('Mahendra', 'a',7)
from dual

SQL to display address where vowel followed by an ‘r’ and ‘p’

select addr, regexp_instr (addr, '[aeiou][rp]')
from address_mah
where  regexp_instr (addr, '[aeiou][rp]') > 0

Ranges (Minus Signs)

SQL for the letters ‘a’ through ‘j’ followed by ‘n’

select addr, regexp_instr (addr, '[a-j]n' ) where_is_it
from address_mah
where regexp_instr (addr, '[a-j]n') > 0



REGEXP_LIKE:

It returns a Boolean to signify the existence of a pattern.

SQL to display addresses in which ‘g’ or ‘p’ used

select *
from address_mah
where addr like '%g%' or addr like '%p%'

or  lets use regexp_like

select *
from address_mah
where regexp_like (addr,'[gp]') 


REGEXP_SUBSTR:
It returns part of a string

 SQL> select regexp_substr ('Yababa dababa do','a.a')
  2  from dual;

REG                                                                             
---                                                                             
aba                                                                             

SQL>  -- * matches zero or more occurences, + one or more occurences
SQL> 
SQL> select regexp_substr ('Yababa dababa do','a.*a')
  2  from dual;

REGEXP_SUBST                                                                    
------------                                                                    
ababa dababa                                                                    

SQL> select regexp_substr ('Yababa dababa do','a.+a')
  2  from dual;

REGEXP_SUBST                                                                    
------------                                                                    
ababa dababa  

REGEXP_REPLACE:
It returns a string with part of it replaced

SQL> 
SQL> select replace ('This is a test','t','XYZ') from dual;

REPLACE('THISISATE                                                              
------------------                                                              
This is a XYZesXYZ                                                              

SQL> 
SQL> select regexp_replace ('This is a test','t.+t','XYZ') from dual;

REGEXP_REPLAC                                                                   
-------------                                                                   
This is a XYZ                                                                   

SQL> select addr, regexp_substr (addr,'ird|irs')
  2  from address_mah; 


REGEXP_COUNT:

Returns number
Helps to count the given pattern In a string

DROP TABLE t1;
CREATE TABLE t1 (
  data VARCHAR2(50)
);
INSERT INTO t1 VALUES ('1234');
INSERT INTO t1 VALUES ('1234 1234');
INSERT INTO t1 VALUES ('1234 1234 1234');
COMMIT;

select data, regexp_count(data, '[1-4]{4}') col1, regexp_count(data,'[[:digit:]]{4}') col2, regexp_count(data,'\d{4}') col3, regexp_count(data,'\d{4}.\d{4}.\d{4}') col4
from
t1

Examples:

select 'ArtADB1234567e9876540', regexp_substr('ArtADB1234567e9876540','[A-Z][a-z]+',1,1) col1,
regexp_substr('ArtADB1234567e9876540','[A-Z]+',1,2) col2,
regexp_substr('ArtADB1234567e9876540','[0-9]+') col3,
regexp_substr('ArtADB1234567e9876540','\d+') col4,
regexp_substr('ArtADB1234567e9876540','[a-z]+') col5,
regexp_substr('ArtADB1234567e9876540','[a-z]+',1,2) col6,
regexp_substr('ArtADB1234567e9876540','\d+',1,2) col7
from dual


DROP TABLE t1;
CREATE TABLE t1 (
  data VARCHAR2(50)
);

INSERT INTO t1 VALUES ('978/955086/GZ120804/10-FEB-12');
INSERT INTO t1 VALUES ('97/95508/BANANA/10-FEB-12');
INSERT INTO t1 VALUES ('97/95508/"APPLE"/10-FEB-12');
COMMIT;


select data, regexp_substr (data,'[^/]+') col1, regexp_substr (data,'[^/]+',1,1) col2, regexp_substr (data,'[^/]+',1,2) col3,
regexp_substr (data,'[^/]+',1,3) col4 , regexp_substr (data,'[^/]+',1,4) col5, regexp_substr (data,'[^/"]+',1,3) col6
from t1


DROP TABLE t1;
CREATE TABLE t1 (
  data VARCHAR2(50)
);

INSERT INTO t1 VALUES ('SocialSecurityNumber');
INSERT INTO t1 VALUES ('HouseNumber');
COMMIT;

select data, regexp_substr (data, '[A-Z][a-z]+') col1, regexp_replace (data, '([A-Z])',' \1',2 ) col2,
regexp_replace (data, '([A-Z])',' ' ) col3, regexp_replace (data, '([A-Z])',' ',2 ) col4,
regexp_replace (data, '([A-Z])',' #',2 ) col5, regexp_replace (data, '([A-Z])','#',1 ) col6
from t1


DROP TABLE t1;
CREATE TABLE t1 (
  data VARCHAR2(50)
);

INSERT INTO t1 VALUES ('1 01:01:01');
INSERT INTO t1 VALUES ('.2 02:02:02');
INSERT INTO t1 VALUES ('..3 03:03:03');
COMMIT;
select data, regexp_instr (data, '[0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}') col1, regexp_replace (data, '\.') col2,
regexp_replace (data,'\:','-') col3, regexp_replace (data,'\:','-') col4, regexp_replace (regexp_replace (data,'\:','-'),'\.') col5
from t1


create table oecust_test_mah as select customer_no, ship_to_name, BILL_TO_ORDER_ACKMT_EMAIL as email1, BILL_TO_SHPMT_NOTIF_EMAIL as email2,
BILL_TO_PRICE_CHK_EMAIL_ADDR as email3
from oecustomer_shipping


SQL to display customer names having numbers only

select distinct ship_to_name
 from OECUST_TEST_MAH
where  regexp_like (ship_to_name, '^[[:digit:]]+$')

select distinct ship_to_name
 from OECUST_TEST_MAH
where  regexp_like (ship_to_name, '^[[:digit:] ]+$') – space is there in between ] ]

SQL to display customer names with alphanumeric values only

select * from
OECUST_TEST_MAH
where  regexp_like (ship_to_name, '[[:alpha:]]')

Note: The result contains numbers, characters, special characters like -, # etc…

SQL to display customer names with special characters

select ship_to_name, count(ship_to_name) over ()
 from oecust_test_mah
where   regexp_like(ship_to_name, '[/.~`!@#$%^&*-_,;:|]' )

SQL to validate email address

select customer_no,ship_to_name, email1,   regexp_substr(email1,'[a-z A-Z 0-9]+@[a-z A-Z 0-9]+\.[a-z A-Z 0-9]{2,4}') col2, count(*) over ()
 from oecust_test_mah
where email1 is not null


select customer_no,ship_to_name, email1,   regexp_substr(email1,'[a-z A-Z 0-9 \.]+@[a-z A-Z 0-9]+\.[a-z A-Z 0-9\.]+') col2, count(*) over ()
 from oecust_test_mah
where email1 is not null
and email1 !=  regexp_substr(email1,'[a-z A-Z 0-9 \. \_]+@[a-z A-Z 0-9]+\.[a-z A-Z 0-9 \.]+')
and not regexp_like (email1,'[,;-]')

Customer Names with space in their name

select ship_to_name, count(*) over ()
 from oecust_test_mah
where  regexp_like (ship_to_name,'[[:space:]]')

SQL to validate email id’s ends with .com or not

select  distinct count(*) over (),o.email1, regexp_substr(email1,'\.[a-z A-Z]{3}$')
 from oecust_test_mah o
where email1 is not null
and  regexp_like (email1,'\.[a-z A-Z]{3}$')

OR

select  distinct count(*) over (),o.email1, regexp_substr(email1,'\.[[:alpha:]]{3}$')
 from oecust_test_mah o
where email1 is not null
and  regexp_like (email1,'\.[[:alpha:]]{3}$')


select case when
regexp_like('Super' || chr(13) || 'Star' ,'[[:cntrl:]]')
then 'Match Found'
else 'No Match Found'
end
as output from dual;

select regexp_replace('Help     Earth           Stay         Green','[[:blank:]]{2,15}',' ')
from dual

OR

select regexp_replace('Help     Earth           Stay         Green','[[:space:]]{2,15}',' ')
from dual


select regexp_substr('1PSN/231_3253/ABc','[[:alnum:]]+')
from dual
1PSN

select regexp_substr('1PSN/231_3253/ABc','[[:alpha:]]+')
from dual
PSN

select regexp_substr('1PSN/231_3253/ABc','[[:digit:]]+')
from dual
1

select case when
regexp_like('987-65-4321' ,'^[0-9]{3}-[0-9]{2}-[0-9]{4}$')
then 'Match Found'
else 'No Match Found'
end
as output from dual;


SELECT REGEXP_REPLACE('04099661234','([[:digit:]]{3})([[:digit:]]{4})([[:digit:]]{4})','(\1) ') as Formatted_Phone
FROM dual;

(040)

SELECT REGEXP_REPLACE('04099661234','([[:digit:]]{3})','(\1) ') as Formatted_Phone
FROM dual;
(040) (996) (612) 34

select regexp_replace ('ramamahendra326','([[:alpha:]]{4})([[:alpha:]]{8})([[:digit:]]{3})','\1 ---> \2 --> \3')
from dual
rama ---> mahendra --> 326

SELECT REGEXP_REPLACE('04099661234','([[:digit:]]{3})([[:digit:]]{4})([[:digit:]]{4})','(\1) \2-\3') as Formatted_Phone
FROM dual;
(040) 9966-1234



No comments:

Post a Comment