Find the answer to your Linux question:
Results 1 to 6 of 6
alright... I have file A I have file B which is a subset of A i want to strip file B out of file A i.e. A - B = ...
Enjoy an ad free experience by logging in. Not a member yet? Register.
  1. #1
    Just Joined!
    Join Date
    May 2004
    Location
    Pennsylvania
    Posts
    98

    Stripping a subset of text from a file


    alright...
    I have file A
    I have file B which is a subset of A
    i want to strip file B out of file A
    i.e. A - B = C

    This sounds like it should be easy and i feel i'm missing something....
    file B is multiple lines and has some special characters...
    I'm not ready to put this in a perl script just yet... i just have a feeling that there is a simpler way

    tried
    Code:
    cat A |grep -v "`cat B`" > C
    as well as a similar sed commmand
    I escaped the special chars in B for that as well....
    i think they fail because of the multiple lines....

    I have used
    Code:
     diff -bi A B | sed -e 's/^&#91;0-9&#93;*,&#91;0-9&#93;*&#91;a-z&#93;*&#91;0-9&#93;*$//g' | sed -e 's/^< //g' | sed -e 's/^> //g'| sed -e 's/---//g' > C
    And this works.... however i don't like it because if B is not in A, then i get a mess....

    Real world application:
    Taking over a webmaster position and the previous webmaster used all static HTML with a long header.... i want to strip the header out and keep the "meat" of the page.... unfortunately not every single one of the headers are exactly the same... there are about 3-4 different versions (hence the reason my diff script would make more of a mess than its worth)

    any suggestions?

  2. #2
    Linux Newbie
    Join Date
    Oct 2004
    Posts
    158
    Code:
    sort -o A.srt A
    sort -o B.srt B
    comm -1 A.srt B.srt   > FileA_minus_FileB

  3. #3
    Just Joined!
    Join Date
    May 2004
    Location
    Pennsylvania
    Posts
    98
    Thanks for the suggestion...
    but sorting will do no good...
    for one, i need the text to stay in order.
    for two, comm looks basically like a diff which is going would leave me in a similar situation as the other script i posted

  4. #4
    Linux Newbie
    Join Date
    Oct 2004
    Posts
    158
    comm in fact does do what you want.
    But since you dont like it try some C code:

    Code:
    #include <unistd.h>
    #include <fcntl.h>
    #include <errno.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <sys/stat.h>
    #include <string.h>
    
    #define ck&#40;x&#41; if&#40;&#40;x&#41;==NULL&#41;\
    &#123;perror&#40;"Error"&#41;; exit&#40;EXIT_FAILURE&#41;;&#125;
    /* read nbyte from a file  - can read whole file */
    ssize_t readall&#40;int fd, void *buf, size_t nbyte&#41;&#123;
         ssize_t nread = 0,
                       n=0;
    
         do &#123;
             if &#40;&#40;n = read&#40;fd, &&#40;&#40;char *&#41;buf&#41;&#91;nread&#93;, nbyte - nread&#41;&#41; == -1&#41; &#123;
                 if &#40;errno == EINTR&#41;
                     continue;
                 else
                     return &#40;-1&#41;;
             &#125;
             if &#40;n == 0&#41;
                 return nread;
             nread += n;
         &#125; while &#40;nread < nbyte&#41;;
         return nread;
    &#125;
    
     /* get file size */
    size_t file_size&#40;FILE *in&#41;
    &#123;
    	 struct stat st;
    	 if&#40;fstat&#40;fileno&#40;in&#41;, &st&#41; == &#40;-1&#41;&#41;
    	 &#123;
    	 	perror&#40;"stat error"&#41;;
    	 	exit&#40;EXIT_FAILURE&#41;;
    	 &#125;	
         return st.st_size;
    &#125;
    /* argv&#91;1&#93; = FileA  argv&#91;2&#93; = FileB*/
    int main&#40;int argc, char *argv&#91;1&#93;&#41;
    &#123;
    	char *buf=NULL;
    	FILE *in=fopen&#40;argv&#91;1&#93;,"r"&#41;;    /* open afile for read */
    	FILE *in1=fopen&#40;argv&#91;2&#93;,"r"&#41;;
    	size_t filebytes=0;
    	char record&#91;256&#93;=&#123;0x0&#125;;
    
    	ck&#40;in&#41;;                       /* check file errors */
    	ck&#40;in1&#41;;
    	filebytes=file_size&#40;in&#41;;      /* get size of buf we need */
    	if&#40;filebytes&#41;                 /* do we have a file with data in it? */
    	&#123;
    		ck&#40;buf=malloc&#40;filebytes+1&#41; &#41;;	/* create storage */
    		memset&#40;buf,0x0, filebytes+1&#41;;   /* init storage */
    		if&#40; readall&#40;fileno&#40;in&#41;,buf,filebytes&#41;>0 &#41; /* read entire file */
    		&#123;
    			ck&#40;fprintf&#40;stdout,"%s",buf&#41;&#41;; /* print whole file */
    		&#125;
    		else                         /* complain about errors reading file */
    		&#123;
    			ck&#40;fprintf&#40;stderr,"file read error\n"&#41; &#41;;
    			exit&#40;EXIT_FAILURE&#41;;
    	    &#125;
    	    while&#40;fgets&#40;record,sizeof&#40;record&#41;,in&#41;!=NULL&#41;
    	    &#123;
    	    	if&#40;	strstr&#40;buf,record&#41;==NULL&#41; /* not found */
    	    	&#123;
    	    		ck&#40;fprintf&#40;stdout,"%s",record&#41;&#41;;
    	    	&#125;
    	    &#125;
    	    free&#40;buf&#41;;                 /* release the buffer */
    	&#125;
    	if&#40;! fclose&#40;in&#41;	&#41;              /* close file with error check */
        &#123;
        	return 0;                  /* normal return */
        &#125;
    	ck&#40;fprintf&#40;stderr, "filesystem error\n"&#41; &#41;;
    	return EXIT_FAILURE;           /* file close error - return */
    &#125;

  5. #5
    Just Joined!
    Join Date
    May 2004
    Location
    Pennsylvania
    Posts
    98
    Quote Originally Posted by jim mcnamara
    Code:
    sort -o A.srt A
    sort -o B.srt B
    comm -1 A.srt B.srt   > FileA_minus_FileB
    This leaves me with FileA_minus_FileB = B.srt....
    not saying i don't believe you, just saying it doesn't work for me.
    Thanks for the C code too... may have to try it out someday...
    As it turns out i just wrote a perl script to:
    read file A, put the full contents into a single string, using a special character string to represent newlines
    read file B, put full contents into a single string, using the same special character string for newlines
    use a regex replace of string_of_A compared to string_of_B
    write file C using regex replace newline_string with newline in string_of_A

  6. #6
    Just Joined!
    Join Date
    Oct 2005
    Posts
    31

    Are you stripping entire lines, sounds like a job for ruby

    Code:
    alines = Array.new
    clines = Array.new
    #store each line in file a to an array
    File.open&#40;/path/filea&#41;.each&#123;|line| alines.push&#40;line&#41;&#125;
    #check each line of file b against all lines in a
    File.open&#40;/path/fileb&#41;.each&#123;|line|
    if not alines.include?&#40;line&#41;
    clines.push&#40;line&#41;
    end
    &#125;
    #write the resulting lines into file c
    filec = File.new&#40;/path/filec&#41;
    clines.each&#123;|line| filec.puts&#40;line&#41;&#125;
    I hope that you find the code interesting...

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •